For the reader: You can click the black “code” box on the right hand side to either see or hide the R code behind analysis results and graphs.
The results are arranged to separate tabs by site/area, so that it’s easy to look at just the results of the site of interest, instead of scrolling accidentally down to a different site and getting confused.
The results for each site consist of four parts: pXRF, particle size distribution, loss on ignition, and colorimetry.
Intro about the site
From literature we can identify three rough classes of elements that could be used to analyze mudbricks from pXRF results:
Good: Rb, Sr, Y, Zr, Nb, Th, Pb, Cu, Zn, Fe
Neutral: V, Cr, Co, Ni, Ca, Ti
Bad: Na, P, Ba (+ Ca, Al)
The “bad” elements were excluded from the pXRF results from the get-go. After scaling and averaging the remaining values we ended up with two analysis datasets: “pxrf_all” that contains all the elements that have non-zero results, and “pxrf_no_na”, in which all the elements that have even one NA result are omitted.
# Libraries
library(openxlsx);
library(dplyr);
library(GGally);
library(corrplot);
library(tidyr);
library(cluster);
library(ggplot2);
library(ggrepel);
library(ggfortify);
library(pca3d);
library(ggdendro); # dendrograms together with ggplot
library(dendextend); # dendrograms - change parameters
# Read the raw data
pxrf <- read.xlsx("data/pxrf_AY.xlsx", sep=";")
# Remove any rows containing words 'ERROR' or 'TEST'
pxrf <- filter(pxrf, !grepl('TEST', Sample))
pxrf <- filter(pxrf, !grepl('ERROR', Sample))
# Turn any "LOD":s and missing values to "NA":s to allow scaling
values <- c("MgO", "Al2O3", "SiO2",
"S", "Cl", "K2O", "CaO", "Ti", "V", "Cr",
"Mn", "Fe", "Co", "Ni", "Cu", "Zn", "As",
"Se", "Rb", "Sr", "Y", "Zr", "Nb", "Mo",
"Rh", "Pd", "Ag", "Cd", "Sn", "Sb",
"La", "Ce", "Hf", "Ta", "W", "Pt", "Au",
"Hg", "Tl", "Pb", "Bi", "Th", "U")
pxrf_NA <- select(pxrf, one_of(values))
pxrf_NA <- pxrf_NA %>% mutate_if(is.character,as.numeric)
# Scaling the data with standard z-score method
scaled_pxrf <- as.data.frame(scale(pxrf_NA))
# Average data by "Sample", "Area" and "Type" columns from the original dataset
average_pxrf <- aggregate(scaled_pxrf, by = list(pxrf$Sample, pxrf$Area, pxrf$Type), FUN = mean)
# Assign sample names as row names, and rename the other newly created columns back to "Type" and "Area" for clarity
rownames(average_pxrf) <- average_pxrf$Group.1
average_pxrf <- average_pxrf %>% select(-Group.1)
names(average_pxrf)[names(average_pxrf) == "Group.2"] <- "Area"
names(average_pxrf)[names(average_pxrf) == "Group.3"] <- "Type"
# Change "Type" and "Area" to factors
average_pxrf$Type <- factor(average_pxrf$Type)
average_pxrf$Area <- factor(average_pxrf$Area)
# Removing all columns that only have NA values
all_na <- function(x) any(!is.na(x))
pxrf_all <- average_pxrf %>% select_if(all_na)
# Removing all columns that contain any NA values at all
any_na <- function(x) all(!is.na(x))
pxrf_no_na <- average_pxrf %>% select_if(any_na)
pxrf_no_na:
summary(pxrf_no_na)
## Area Type Al2O3 SiO2
## Area D Acropolis:19 mud mortar : 1 Min. :-1.20092 Min. :-1.3142038
## Area D Wall :22 mud plaster: 5 1st Qu.:-0.49712 1st Qu.:-0.4978527
## Builder's floor : 5 mudbrick :35 Median :-0.23740 Median :-0.0003512
## Hellenistic : 1 PEM : 6 Mean : 0.04576 Mean : 0.0389010
## 3rd Qu.: 0.54615 3rd Qu.: 0.5568590
## Max. : 1.88920 Max. : 1.3770734
## K2O CaO Ti Mn
## Min. :-1.08731 Min. :-1.56460 Min. :-1.17385 Min. :-1.00032
## 1st Qu.:-0.54550 1st Qu.:-0.45677 1st Qu.:-0.47636 1st Qu.:-0.50265
## Median :-0.02978 Median : 0.07788 Median :-0.04881 Median :-0.13876
## Mean : 0.04621 Mean : 0.06819 Mean : 0.04264 Mean : 0.03671
## 3rd Qu.: 0.39368 3rd Qu.: 0.64823 3rd Qu.: 0.53925 3rd Qu.: 0.44988
## Max. : 2.35094 Max. : 1.91455 Max. : 2.05476 Max. : 1.93487
## Fe Cu Zn Rb
## Min. :-1.10643 Min. :-1.50957 Min. :-1.03131 Min. :-1.28223
## 1st Qu.:-0.49989 1st Qu.:-0.49095 1st Qu.:-0.61933 1st Qu.:-0.54198
## Median :-0.09144 Median : 0.18813 Median :-0.20735 Median :-0.13073
## Mean : 0.04543 Mean : 0.04846 Mean : 0.02864 Mean : 0.04121
## 3rd Qu.: 0.46449 3rd Qu.: 0.64086 3rd Qu.: 0.29970 3rd Qu.: 0.68149
## Max. : 2.31412 Max. : 1.88584 Max. : 2.45465 Max. : 1.92553
## Sr Zr
## Min. :-1.85766 Min. :-1.61460
## 1st Qu.:-0.41969 1st Qu.:-0.60738
## Median : 0.10393 Median : 0.05199
## Mean : 0.07807 Mean : 0.05398
## 3rd Qu.: 0.51031 3rd Qu.: 0.55041
## Max. : 2.08896 Max. : 2.11835
pxrf_all:
summary(pxrf_all)
## Area Type MgO Al2O3
## Area D Acropolis:19 mud mortar : 1 Min. :-1.23821 Min. :-1.20092
## Area D Wall :22 mud plaster: 5 1st Qu.:-0.43843 1st Qu.:-0.49712
## Builder's floor : 5 mudbrick :35 Median : 0.02957 Median :-0.23740
## Hellenistic : 1 PEM : 6 Mean : 0.03914 Mean : 0.04576
## 3rd Qu.: 0.42944 3rd Qu.: 0.54615
## Max. : 1.75657 Max. : 1.88920
## NA's :1
## SiO2 S Cl K2O
## Min. :-1.3142038 Min. :-0.2340 Min. :-0.88004 Min. :-1.08731
## 1st Qu.:-0.4978527 1st Qu.:-0.2328 1st Qu.:-0.51354 1st Qu.:-0.54550
## Median :-0.0003512 Median :-0.2107 Median :-0.10742 Median :-0.02978
## Mean : 0.0389010 Mean :-0.2000 Mean : 0.01773 Mean : 0.04621
## 3rd Qu.: 0.5568590 3rd Qu.:-0.1779 3rd Qu.: 0.17488 3rd Qu.: 0.39368
## Max. : 1.3770734 Max. :-0.1447 Max. : 3.94139 Max. : 2.35094
## NA's :43 NA's :1
## CaO Ti V Mn
## Min. :-1.56460 Min. :-1.17385 Min. :-0.98017 Min. :-1.00032
## 1st Qu.:-0.45677 1st Qu.:-0.47636 1st Qu.:-0.26027 1st Qu.:-0.50265
## Median : 0.07788 Median :-0.04881 Median : 0.08755 Median :-0.13876
## Mean : 0.06819 Mean : 0.04264 Mean : 0.08074 Mean : 0.03671
## 3rd Qu.: 0.64823 3rd Qu.: 0.53925 3rd Qu.: 0.42728 3rd Qu.: 0.44988
## Max. : 1.91455 Max. : 2.05476 Max. : 1.57588 Max. : 1.93487
## NA's :28
## Fe Ni Cu Zn
## Min. :-1.10643 Min. :-1.10296 Min. :-1.50957 Min. :-1.03131
## 1st Qu.:-0.49989 1st Qu.:-0.44318 1st Qu.:-0.49095 1st Qu.:-0.61933
## Median :-0.09144 Median :-0.05642 Median : 0.18813 Median :-0.20735
## Mean : 0.04543 Mean : 0.01430 Mean : 0.04846 Mean : 0.02864
## 3rd Qu.: 0.46449 3rd Qu.: 0.53510 3rd Qu.: 0.64086 3rd Qu.: 0.29970
## Max. : 2.31412 Max. : 1.71814 Max. : 1.88584 Max. : 2.45465
## NA's :1
## As Rb Sr Y
## Min. :-0.8953 Min. :-1.28223 Min. :-1.85766 Min. :-1.16797
## 1st Qu.:-0.2961 1st Qu.:-0.54198 1st Qu.:-0.41969 1st Qu.:-0.41036
## Median : 0.2033 Median :-0.13073 Median : 0.10393 Median :-0.05384
## Mean : 0.2832 Mean : 0.04121 Mean : 0.07807 Mean : 0.04866
## 3rd Qu.: 0.6028 3rd Qu.: 0.68149 3rd Qu.: 0.51031 3rd Qu.: 0.39182
## Max. : 2.3007 Max. : 1.92553 Max. : 2.08896 Max. : 2.26357
## NA's :27 NA's :2
## Zr Nb
## Min. :-1.61460 Min. :-0.89312
## 1st Qu.:-0.60738 1st Qu.:-0.48298
## Median : 0.05199 Median :-0.07283
## Mean : 0.05398 Mean : 0.06816
## 3rd Qu.: 0.55041 3rd Qu.: 0.43985
## Max. : 2.11835 Max. : 2.18298
## NA's :31
# Drop the more dubious elements
pxrf_1 <- pxrf_no_na %>% select(-Al2O3)
pxrf_1 <- pxrf_1 %>% select(-CaO)
# Visualize correlations between elements
cor(pxrf_1[3:12]) %>% corrplot (type = "lower", tl.cex = 1, tl.pos="d", cl.pos="r")
# Optimal number of clusters in K-means analysis: 2
k_max <- 8
twcss <- sapply(1:k_max, function(k){kmeans(pxrf_1[3:12], k)$tot.withinss})
qplot(x = 1:k_max, y = twcss, geom = 'line')
# K-means clustering in ggpairs: 2 clusters
km <-kmeans(pxrf_1[3:12], centers = 2)
cluster_x <- as.factor(km$cluster)
ggpairs(pxrf_1, mapping = aes(col = cluster_x))
PCA with only the no-NA:s elements:
# Elements with no NA values at all
colnames(pxrf_no_na)
## [1] "Area" "Type" "Al2O3" "SiO2" "K2O" "CaO" "Ti" "Mn" "Fe"
## [10] "Cu" "Zn" "Rb" "Sr" "Zr"
# PCA
pca_1 <- prcomp(pxrf_1[3:12])
summary(pca_1)
## Importance of components:
## PC1 PC2 PC3 PC4 PC5 PC6 PC7
## Standard deviation 1.9037 1.0026 0.8270 0.67213 0.37446 0.35729 0.29419
## Proportion of Variance 0.5809 0.1611 0.1096 0.07242 0.02248 0.02046 0.01387
## Cumulative Proportion 0.5809 0.7421 0.8517 0.92413 0.94661 0.96707 0.98095
## PC8 PC9 PC10
## Standard deviation 0.26346 0.16502 0.14906
## Proportion of Variance 0.01113 0.00437 0.00356
## Cumulative Proportion 0.99207 0.99644 1.00000
# PCA1 plots
biplot(pca_1, choices = 1:2, cex = c(1, 1.2), col = c("grey80", "deeppink2"), main = "PCA Ashdod-Yam elements")
autoplot(pca_1, data=pxrf_1, colour='Area', shape = FALSE, label = TRUE, main = "PCA Ashdod-Yam grouped by area")
autoplot(pca_1, data=pxrf_1, colour='Type', shape = FALSE, label = TRUE, main = "PCA Ashdod-Yam grouped by sample type")
PCA with all the feasible elements:
# Elements that have at least one viable value
colnames(pxrf_all)
## [1] "Area" "Type" "MgO" "Al2O3" "SiO2" "S" "Cl" "K2O" "CaO"
## [10] "Ti" "V" "Mn" "Fe" "Ni" "Cu" "Zn" "As" "Rb"
## [19] "Sr" "Y" "Zr" "Nb"
# PCA with NA:s converted to zeros
pxrf_all[is.na(pxrf_all)] <- 0
pca_2 <- prcomp(pxrf_all[3:22])
summary(pca_2)
## Importance of components:
## PC1 PC2 PC3 PC4 PC5 PC6 PC7
## Standard deviation 2.1392 1.2623 1.1731 0.77694 0.75938 0.56778 0.50254
## Proportion of Variance 0.4455 0.1551 0.1340 0.05877 0.05614 0.03138 0.02459
## Cumulative Proportion 0.4455 0.6006 0.7346 0.79337 0.84951 0.88090 0.90548
## PC8 PC9 PC10 PC11 PC12 PC13 PC14
## Standard deviation 0.44075 0.42941 0.3738 0.33174 0.29859 0.26048 0.24174
## Proportion of Variance 0.01891 0.01795 0.0136 0.01071 0.00868 0.00661 0.00569
## Cumulative Proportion 0.92439 0.94235 0.9559 0.96666 0.97534 0.98195 0.98764
## PC15 PC16 PC17 PC18 PC19 PC20
## Standard deviation 0.22407 0.16224 0.16055 0.12292 0.08709 0.04448
## Proportion of Variance 0.00489 0.00256 0.00251 0.00147 0.00074 0.00019
## Cumulative Proportion 0.99253 0.99509 0.99760 0.99907 0.99981 1.00000
# PCA2 plots
biplot(pca_2, choices = 1:2, cex = c(1, 1.2), col = c("grey80", "deeppink2"), main = "PCA Ashdod-Yam more elements")
autoplot(pca_1, data=pxrf_1, colour='Area', shape = FALSE, label = TRUE, main = "PCA Ashdod-Yam more elements grouped by area")
autoplot(pca_1, data=pxrf_1, colour='Type', shape = FALSE, label = TRUE, main = "PCA Ashdod-Yam more elements grouped by sample type")
# HCA
# compute divisive hierarchical clustering
hc4 <- diana(pxrf_1[3:12])
# Divise coefficient; amount of clustering structure found
hc4$dc
## [1] 0.8007861
# Diana dendrogram
pltree(hc4, cex = 1, hang = -1, main = "Dendrogram of diana")
# HCA dendrogam 1
dend <-
pxrf_1[3:12] %>% # data
dist %>% # calculate a distance matrix
hclust(method = "ward.D2") %>% # hierarchical clustering
as.dendrogram %>% # turn the object into a dendrogram
set("branches_k_color", k=5) %>%
set("branches_lwd", 1.2) %>%
set("labels_col", c("red", "black", "blue"), k=5) %>%
set("labels_cex", 1) %>%
set("leaves_pch", NA)
# plot the dend in usual "base" plotting engine:
plot(dend)
# HCA dendrogam 2:
HCA.ward5 <- hclust(dist(pxrf_1[3:12], method="euclid"), method="ward.D2")
plot(HCA.ward5, main="Ward's method")
rect.hclust(HCA.ward5, k=5)
# Libraries
library(dplyr);
library(openxlsx);
library(ggtern)
# Read and filter data
grain <- read.xlsx("data/grain_AY.xlsx", sep=";")
# Remove automatically created averages in order to include results from multiple runs for the same sample
grain <- grain %>%
filter(!grepl('Average', Sample)) %>%
filter(!grepl('test', Sample))
# Average data by "Sample", "Context" and "Type" columns from the original dataset
grain <- aggregate(grain, by=list(grain$Sample, grain$Context, grain$Type), FUN=mean)
# Remove the now empty columns for clarity
grain <- grain %>% select(-Sample)
grain <- grain %>% select(-Context)
grain <- grain %>% select(-Type)
# Assign sample names ("Group.1") as row names, and rename the other created columns back to "Type" and "Area" for clarity
rownames(grain) <- grain$Group.1
grain <- grain %>% select(-Group.1)
names(grain)[names(grain) == "Group.2"] <- "Context"
names(grain)[names(grain) == "Group.3"] <- "Type"
# Scaling clay-silt-sand portions to values between 0 and 1
# (Otherwise the differences in low clay percentages are too small to differentiate)
normalize <- function(x, na.rm=TRUE){(x-min(x, na.rm=TRUE))/(max(x, na.rm=TRUE)-min(x, na.rm=TRUE))}
# Apply function "normalize" to clay, silt and sand columns
grain_01 <- as.data.frame(apply(grain[4:6], 2, normalize))
# Ternary plots
ggtern(data=grain_01, aes(x=Clay, y=Sand, z=Silt, color=grain$Context)) +
labs(title="Clay silt sand") +
theme_rgbw() +
theme_nomask() +
geom_point(size=0) +
geom_text(aes(label=rownames(grain_01)), size=3)
ggtern(data=grain_01, aes(x=Clay, y=Sand, z=Silt, color=grain$Context)) +
labs(title="Clay silt sand") +
theme_rgbw() +
theme_nomask() +
geom_point(size=2)
ggtern(data=grain_01, aes(x=Clay, y=Sand, z=Silt, color=grain$Type)) +
labs(title="Clay silt sand") +
theme_nomask() +
geom_mask() +
geom_point(size=2)
We did loss on ignition analysis both in the traditional manual method and with a modern TGA for comparison.
# Libraries
library(dplyr);
library(GGally);
library(corrplot);
library(tidyr);
library(openxlsx);
library(cluster);
library(ggplot2);
library(ggrepel);
library(ggfortify);
library(pca3d);
library(ggdendro); # dendrograms together with ggplot
library(dendextend); # dendrograms - change parameters
library(ggtern) # ternary plots
Preparing data
loi <- read.xlsx("data/loi_AY.xlsx", sep=";")
tga <- read.xlsx("data/tga_AY.xlsx", sep=";")
summary(loi)
## sample crucible wet_weight sample_wet
## Length:52 Min. :8.079 Min. :10.13 Min. :1.538
## Class :character 1st Qu.:8.566 1st Qu.:11.00 1st Qu.:2.464
## Mode :character Median :9.006 Median :11.71 Median :2.635
## Mean :8.994 Mean :11.55 Mean :2.559
## 3rd Qu.:9.435 3rd Qu.:12.05 3rd Qu.:2.824
## Max. :9.925 Max. :12.68 Max. :3.043
## dry_weight after_550_C after_950_C context
## Min. :10.11 Min. :10.08 Min. :10.05 Length:52
## 1st Qu.:10.97 1st Qu.:10.94 1st Qu.:10.86 Class :character
## Median :11.67 Median :11.62 Median :11.53 Mode :character
## Mean :11.52 Mean :11.47 Mean :11.41
## 3rd Qu.:12.00 3rd Qu.:11.96 3rd Qu.:11.93
## Max. :12.65 Max. :12.60 Max. :12.52
## type
## Length:52
## Class :character
## Mode :character
##
##
##
summary(tga)
## Name Analysis;Date Batch Wet
## Length:53 Length:53 Length:53 Length:53
## Class :character Class :character Class :character Class :character
## Mode :character Mode :character Mode :character Mode :character
## Dry Mass_550 Mass_950
## Length:53 Length:53 Length:53
## Class :character Class :character Class :character
## Mode :character Mode :character Mode :character
# Remove rows containing words 'kekkila' (internal standard)
tga <- filter(tga, !grepl('kekkila', Name))
# TGA character columns to numeric
tga[, c(4:7)] <- sapply(tga[, c(4:7)], as.numeric)
# Sample names as rownames
rownames(loi) <- loi$sample
rownames(tga) <- tga$Name
# Creating new columns for analysis by subtracting crucible mass;
# "dry" (sample mass after drying)
# "c550" (sample mass after 550 C)
# "c950" (sample mass after 950 C)
loi$dry <- (loi$dry_weight - loi$crucible)
loi$c550mass <- (loi$after_550_C - loi$crucible)
loi$c950mass <- (loi$after_950_C - loi$crucible)
loi$c550 <- (loi$dry - loi$c550mass)/(loi$dry) * 100
loi$c950 <- (loi$c550mass - loi$c950mass)/(loi$c550mass) * 100
# Same with TGA
tga$c550 <- (tga$Dry - tga$Mass_550)/(tga$Dry) * 100
tga$c950 <- (tga$Mass_550 - tga$Mass_950)/(tga$Mass_550) * 100
Manual LOI results
# Total distribution of LOI in 550 C (1) and 950 C (2)
boxplot(loi$c550, loi$c950)
# boxplot.stats gives us the lower whisker or minimum, the interquartile range of the middle (the second and fourth values), the median (third value) and the upper whisker or maximum.
boxplot.stats(loi$c950)
## $stats
## [1] 0.398150 1.528018 2.198513 3.162314 4.889189
##
## $n
## [1] 52
##
## $conf
## [1] 1.840428 2.556598
##
## $out
## [1] 8.041755 11.960444
# The two high carbonate outliers are both AY-53 (two different measurements to make sure the first batch wasn't misleading)
# Removing duplicates from the next graph
rownames(loi)
## [1] "AY-1" "AY-2" "AY-3" "AY-4" "AY-5" "AY-6" "AY-7"
## [8] "AY-8" "AY-9" "AY-10" "AY-11" "AY-12" "AY-13" "AY-14"
## [15] "AY-15" "AY-16" "AY-17" "AY-18" "AY-19" "AY-20" "AY-21"
## [22] "AY-22" "AY-23" "AY-24" "AY-25" "AY-26" "AY-27" "AY-28"
## [29] "AY-29" "AY-30" "AY-31" "AY-32" "AY-33" "AY-34" "AY-35"
## [36] "AY-36" "AY-37" "AY-38" "AY-39" "AY-40" "AY-41" "AY-42"
## [43] "AY-50" "AY-51" "AY-52" "AY-53" "AY-54" "AY-50_2" "AY-51_2"
## [50] "AY-52_2" "AY-53_2" "AY-54_2"
loi <- subset(loi[1:47, ])
ggplot(loi,
aes(c550, c950, label = rownames(loi), colour = factor(context))) +
geom_point(size=2, aes(shape = factor(type))) +
geom_text_repel(size=3) + labs(title = "Organic vs. carbonate content") +
xlab('Organic LOI (%)') +
ylab('Carbonate LOI (%)') +
theme(axis.title = element_text())
ggplot(loi,
aes(c550, c950)) +
geom_point(size=2, aes(shape = factor(type), colour = factor(context))) +
labs(title = "Organic vs. carbonate content") +
xlab('Organic LOI (%)') +
ylab('Carbonate LOI (%)') +
theme(axis.title = element_text())
barplot(loi$c550)
barplot(loi$c950)
# Stacked barplot (x = reorder(sample, LOI))
loi_long = gather(loi, key = var, value = value, c550, c950)
ggplot(loi_long, aes(x = reorder(sample, value), y = value, fill = var)) +
geom_bar(stat = 'identity', position = 'stack') +
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1))
TGA LOI results
# Total distribution of LOI in 550 C (1) and 950 C (2)
boxplot(tga$c550, tga$c950)
# boxplot.stats gives us the lower whisker or minimum, the interquartile range of the middle (the second and fourth values), the median (third value) and the upper whisker or maximum.
boxplot.stats(tga$c950)
## $stats
## [1] 0.7233976 1.5855926 2.3958197 3.4621816 5.4590326
##
## $n
## [1] 50
##
## $conf
## [1] 1.976504 2.815135
##
## $out
## [1] 7.994718
# Removing duplicates from the next graph
#rownames(tga)
#loi <- subset(loi[1:47, ])
ggplot(tga,
aes(c550, c950, label = rownames(tga))) +
geom_point(size=2) +
geom_text_repel(size=3) + labs(title = "Organic vs. carbonate content") +
xlab('Organic LOI (%)') +
ylab('Carbonate LOI (%)') +
theme(axis.title = element_text())
barplot(tga$c550)
barplot(tga$c950)
# Stacked barplot (x = reorder(sample, LOI))
tga_long = gather(tga, key = var, value = value, c550, c950)
ggplot(tga_long, aes(x = reorder(Name, value), y = value, fill = var)) +
geom_bar(stat = 'identity', position = 'stack') +
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1))
# Libraries
library(dplyr);
library(ggplot2);
library(ggrepel);
library(rgl);
library(plot3D);
library(colordistance)
color1 <- read.table("data/color_AY.txt", header = TRUE, sep = "\t", dec = ",")
rownames(color1) <- color1$Data.Name
color <- subset(color1[, 3:5])
summary(color)
## L a b
## Min. :37.56 Min. : 7.97 Min. :16.81
## 1st Qu.:42.13 1st Qu.: 9.72 1st Qu.:19.76
## Median :43.64 Median :10.38 Median :21.18
## Mean :43.92 Mean :10.49 Mean :21.22
## 3rd Qu.:45.91 3rd Qu.:11.29 3rd Qu.:22.34
## Max. :48.24 Max. :13.53 Max. :25.03
ggplot(color,
aes(a, b, label = rownames(color))) +
geom_point(size=2) +
geom_text_repel(size=3) +
labs(title = "Color (a*b)") +
xlab("a D65") +
ylab("b D65")
ggplot(color1,
aes(X, L, label = rownames(color))) +
geom_point(size=2) +
geom_text_repel(size=3) +
labs(title = "Luminance") +
ylab("L* (D65)") +
xlab("Sample number")
ggplot(color,
aes(a, L, label = rownames(color))) +
geom_point(size=2) +
geom_text_repel(size=3) +
labs(title = "A versus luminance") +
ylab("L* (D65)") +
xlab("a")
ggplot(color,
aes(b, L, label = rownames(color))) +
geom_point(size=2) +
geom_text_repel(size=3) +
labs(title = "B versus luminance") +
ylab("L* (D65)") +
xlab("b")
# Colorful 3D plot with drop lines
scatter3D(color$a, color$b, color$L,
xlab = "a* (D65)", ylab = "b* (D65)", zlab="L* (D65)",
phi = 0, bty = "g", type = "h",
ticktype = "detailed", pch = 19, cex = 1.2)
text3D(color$a, color$b, color$L, labels = rownames(color),
add = TRUE, colkey = FALSE, cex = 0.7)
scatter3D(color$L, color$a, color$b,
xlab = "L* (D65)", ylab = "a* (D65)", zlab="b* (D65)",
phi = 0, bty = "g", type = "h",
ticktype = "detailed", pch = 19, cex = 1.2)
text3D(color$L, color$a, color$b, labels = rownames(color),
add = TRUE, colkey = FALSE, cex = 0.7)
Intro about the site
From literature we can identify three rough classes of elements that could be used to analyze mudbricks from pXRF results:
Good: Rb, Sr, Y, Zr, Nb, Th, Pb, Cu, Zn, Fe
Neutral: V, Cr, Co, Ni, Ca, Ti
Bad: Na, P, Ba (+ Ca, Al)
The “bad” elements were excluded from the pXRF results from the get-go. After scaling and averaging the remaining values we ended up with two analysis datasets: “pxrf_all” that contains all the elements that have non-zero results, and “pxrf_no_na”, in which all the elements that have even one NA result are omitted.
pxrf_no_na:
summary(pxrf_no_na)
## Area Type Al2O3 SiO2
## Area D Acropolis:19 mud mortar : 1 Min. :-1.20092 Min. :-1.3142038
## Area D Wall :22 mud plaster: 5 1st Qu.:-0.49712 1st Qu.:-0.4978527
## Builder's floor : 5 mudbrick :35 Median :-0.23740 Median :-0.0003512
## Hellenistic : 1 PEM : 6 Mean : 0.04576 Mean : 0.0389010
## 3rd Qu.: 0.54615 3rd Qu.: 0.5568590
## Max. : 1.88920 Max. : 1.3770734
## K2O CaO Ti Mn
## Min. :-1.08731 Min. :-1.56460 Min. :-1.17385 Min. :-1.00032
## 1st Qu.:-0.54550 1st Qu.:-0.45677 1st Qu.:-0.47636 1st Qu.:-0.50265
## Median :-0.02978 Median : 0.07788 Median :-0.04881 Median :-0.13876
## Mean : 0.04621 Mean : 0.06819 Mean : 0.04264 Mean : 0.03671
## 3rd Qu.: 0.39368 3rd Qu.: 0.64823 3rd Qu.: 0.53925 3rd Qu.: 0.44988
## Max. : 2.35094 Max. : 1.91455 Max. : 2.05476 Max. : 1.93487
## Fe Cu Zn Rb
## Min. :-1.10643 Min. :-1.50957 Min. :-1.03131 Min. :-1.28223
## 1st Qu.:-0.49989 1st Qu.:-0.49095 1st Qu.:-0.61933 1st Qu.:-0.54198
## Median :-0.09144 Median : 0.18813 Median :-0.20735 Median :-0.13073
## Mean : 0.04543 Mean : 0.04846 Mean : 0.02864 Mean : 0.04121
## 3rd Qu.: 0.46449 3rd Qu.: 0.64086 3rd Qu.: 0.29970 3rd Qu.: 0.68149
## Max. : 2.31412 Max. : 1.88584 Max. : 2.45465 Max. : 1.92553
## Sr Zr
## Min. :-1.85766 Min. :-1.61460
## 1st Qu.:-0.41969 1st Qu.:-0.60738
## Median : 0.10393 Median : 0.05199
## Mean : 0.07807 Mean : 0.05398
## 3rd Qu.: 0.51031 3rd Qu.: 0.55041
## Max. : 2.08896 Max. : 2.11835
pxrf_all:
summary(pxrf_all)
## Area Type MgO Al2O3
## Area D Acropolis:19 mud mortar : 1 Min. :-1.238209 Min. :-1.20092
## Area D Wall :22 mud plaster: 5 1st Qu.:-0.422347 1st Qu.:-0.49712
## Builder's floor : 5 mudbrick :35 Median : 0.004755 Median :-0.23740
## Hellenistic : 1 PEM : 6 Mean : 0.038303 Mean : 0.04576
## 3rd Qu.: 0.429173 3rd Qu.: 0.54615
## Max. : 1.756566 Max. : 1.88920
## SiO2 S Cl K2O
## Min. :-1.3142038 Min. :-0.23396 Min. :-0.88004 Min. :-1.08731
## 1st Qu.:-0.4978527 1st Qu.: 0.00000 1st Qu.:-0.50859 1st Qu.:-0.54550
## Median :-0.0003512 Median : 0.00000 Median :-0.10495 Median :-0.02978
## Mean : 0.0389010 Mean :-0.01702 Mean : 0.01735 Mean : 0.04621
## 3rd Qu.: 0.5568590 3rd Qu.: 0.00000 3rd Qu.: 0.16497 3rd Qu.: 0.39368
## Max. : 1.3770734 Max. : 0.00000 Max. : 3.94139 Max. : 2.35094
## CaO Ti V Mn
## Min. :-1.56460 Min. :-1.17385 Min. :-0.98017 Min. :-1.00032
## 1st Qu.:-0.45677 1st Qu.:-0.47636 1st Qu.: 0.00000 1st Qu.:-0.50265
## Median : 0.07788 Median :-0.04881 Median : 0.00000 Median :-0.13876
## Mean : 0.06819 Mean : 0.04264 Mean : 0.03264 Mean : 0.03671
## 3rd Qu.: 0.64823 3rd Qu.: 0.53925 3rd Qu.: 0.00000 3rd Qu.: 0.44988
## Max. : 1.91455 Max. : 2.05476 Max. : 1.57588 Max. : 1.93487
## Fe Ni Cu Zn
## Min. :-1.10643 Min. :-1.10296 Min. :-1.50957 Min. :-1.03131
## 1st Qu.:-0.49989 1st Qu.:-0.42043 1st Qu.:-0.49095 1st Qu.:-0.61933
## Median :-0.09144 Median :-0.01092 Median : 0.18813 Median :-0.20735
## Mean : 0.04543 Mean : 0.01400 Mean : 0.04846 Mean : 0.02864
## 3rd Qu.: 0.46449 3rd Qu.: 0.53510 3rd Qu.: 0.64086 3rd Qu.: 0.29970
## Max. : 2.31412 Max. : 1.71814 Max. : 1.88584 Max. : 2.45465
## As Rb Sr Y
## Min. :-0.8953 Min. :-1.28223 Min. :-1.85766 Min. :-1.16797
## 1st Qu.: 0.0000 1st Qu.:-0.54198 1st Qu.:-0.41969 1st Qu.:-0.41036
## Median : 0.0000 Median :-0.13073 Median : 0.10393 Median : 0.00000
## Mean : 0.1205 Mean : 0.04121 Mean : 0.07807 Mean : 0.04659
## 3rd Qu.: 0.0000 3rd Qu.: 0.68149 3rd Qu.: 0.51031 3rd Qu.: 0.39182
## Max. : 2.3007 Max. : 1.92553 Max. : 2.08896 Max. : 2.26357
## Zr Nb
## Min. :-1.61460 Min. :-0.8931
## 1st Qu.:-0.60738 1st Qu.: 0.0000
## Median : 0.05199 Median : 0.0000
## Mean : 0.05398 Mean : 0.0232
## 3rd Qu.: 0.55041 3rd Qu.: 0.0000
## Max. : 2.11835 Max. : 2.1830
PCA with only the no-NA:s elements:
PCA with all the feasible elements:
# Libraries
library(dplyr);
library(openxlsx);
library(ggtern)
# Read and filter data
grain <- read.xlsx("data/grain_AYB.xlsx", sep=";")
# Remove automatically created averages in order to include results from multiple runs for the same sample
grain <- grain %>%
filter(!grepl('Average', Sample)) %>%
filter(!grepl('test', Sample))
# Average data by "Sample", "Context" and "Type" columns from the original dataset
grain <- aggregate(grain, by=list(grain$Sample, grain$Context, grain$Type), FUN=mean)
# Remove the now empty columns for clarity
grain <- grain %>% select(-Sample)
grain <- grain %>% select(-Context)
grain <- grain %>% select(-Type)
# Assign sample names ("Group.1") as row names, and rename the other created columns back to "Type" and "Area" for clarity
rownames(grain) <- grain$Group.1
grain <- grain %>% select(-Group.1)
names(grain)[names(grain) == "Group.2"] <- "Context"
names(grain)[names(grain) == "Group.3"] <- "Type"
# Scaling clay-silt-sand portions to values between 0 and 1
# (Otherwise the differences in low clay percentages are too small to differentiate)
normalize <- function(x, na.rm=TRUE){(x-min(x, na.rm=TRUE))/(max(x, na.rm=TRUE)-min(x, na.rm=TRUE))}
# Apply function "normalize" to clay, silt and sand columns
grain_01 <- as.data.frame(apply(grain[4:6], 2, normalize))
# Ternary plots
ggtern(data=grain_01, aes(x=Clay, y=Sand, z=Silt, color=grain$Context)) +
labs(title="Clay silt sand") +
theme_rgbw() +
theme_nomask() +
geom_point(size=0) +
geom_text(aes(label=rownames(grain_01)), size=3)
ggtern(data=grain_01, aes(x=Clay, y=Sand, z=Silt, color=grain$Type)) +
labs(title="Clay silt sand") +
theme_nomask() +
geom_mask() +
geom_point(size=2)
We did loss on ignition analysis both in the traditional manual method and with a modern TGA for comparison.
# Libraries
library(dplyr);
library(GGally);
library(corrplot);
library(tidyr);
library(openxlsx);
library(cluster);
library(ggplot2);
library(ggrepel);
library(ggfortify);
library(pca3d);
library(ggdendro); # dendrograms together with ggplot
library(dendextend); # dendrograms - change parameters
library(ggtern) # ternary plots
Preparing data
loi <- read.xlsx("data/loi_AYB.xlsx", sep=";")
tga <- read.xlsx("data/tga_AYB.xlsx", sep=";")
summary(loi)
## sample crucible wet_weight sample_wet
## Length:7 Min. :8.382 Min. :10.92 Min. :2.257
## Class :character 1st Qu.:8.555 1st Qu.:11.39 1st Qu.:2.613
## Mode :character Median :8.666 Median :11.50 Median :2.880
## Mean :8.943 Mean :11.68 Mean :2.739
## 3rd Qu.:9.442 3rd Qu.:12.13 3rd Qu.:2.905
## Max. :9.560 Max. :12.31 Max. :3.002
## dry_weight after_550_C after_950_C
## Min. :10.83 Min. :10.62 Min. :10.50
## 1st Qu.:11.33 1st Qu.:11.25 1st Qu.:11.04
## Median :11.45 Median :11.39 Median :11.21
## Mean :11.63 Mean :11.55 Mean :11.24
## 3rd Qu.:12.11 3rd Qu.:12.05 3rd Qu.:11.43
## Max. :12.26 Max. :12.24 Max. :12.06
summary(tga)
## Name Analysis;Date Batch Wet
## Length:7 Min. :44536 Length:7 Length:7
## Class :character 1st Qu.:44536 Class :character Class :character
## Mode :character Median :44536 Mode :character Mode :character
## Mean :44536
## 3rd Qu.:44536
## Max. :44536
## Dry Mass_550 Mass_950
## Length:7 Length:7 Length:7
## Class :character Class :character Class :character
## Mode :character Mode :character Mode :character
##
##
##
# Remove rows containing words 'kekkila' (internal standard)
tga <- filter(tga, !grepl('kekkila', Name))
# TGA character columns to numeric
tga[, c(4:7)] <- sapply(tga[, c(4:7)], as.numeric)
# Sample names as rownames
rownames(loi) <- loi$sample
rownames(tga) <- tga$Name
# Creating new columns for analysis by subtracting crucible mass;
# "dry" (sample mass after drying)
# "c550" (sample mass after 550 C)
# "c950" (sample mass after 950 C)
loi$dry <- (loi$dry_weight - loi$crucible)
loi$c550mass <- (loi$after_550_C - loi$crucible)
loi$c950mass <- (loi$after_950_C - loi$crucible)
loi$c550 <- (loi$dry - loi$c550mass)/(loi$dry) * 100
loi$c950 <- (loi$c550mass - loi$c950mass)/(loi$c550mass) * 100
# Same with TGA
tga$c550 <- (tga$Dry - tga$Mass_550)/(tga$Dry) * 100
tga$c950 <- (tga$Mass_550 - tga$Mass_950)/(tga$Mass_550) * 100
Manual LOI results
# Total distribution of LOI in 550 C (1) and 950 C (2)
boxplot(loi$c550, loi$c950)
# boxplot.stats gives us the lower whisker or minimum, the interquartile range of the middle (the second and fourth values), the median (third value) and the upper whisker or maximum.
boxplot.stats(loi$c950)
## $stats
## [1] 1.087679 5.239954 6.463715 14.575647 15.544905
##
## $n
## [1] 7
##
## $conf
## [1] 0.8885902 12.0388404
##
## $out
## [1] 32.40429
# The two high carbonate outliers are both AY-53 (two different measurements to make sure the first batch wasn't misleading)
# Removing duplicates from the next graph
rownames(loi)
## [1] "AY-43" "AY-44" "AY-45" "AY-46" "AY-47" "AY-48" "AY-49"
loi <- subset(loi[1:47, ])
### ggplots later
barplot(loi$c550)
barplot(loi$c950)
# Stacked barplot (x = reorder(sample, LOI))
loi_long = gather(loi, key = var, value = value, c550, c950)
ggplot(loi_long, aes(x = reorder(sample, value), y = value, fill = var)) +
geom_bar(stat = 'identity', position = 'stack') +
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1))
TGA LOI results
# Total distribution of LOI in 550 C (1) and 950 C (2)
boxplot(tga$c550, tga$c950)
# boxplot.stats gives us the lower whisker or minimum, the interquartile range of the middle (the second and fourth values), the median (third value) and the upper whisker or maximum.
boxplot.stats(tga$c950)
## $stats
## [1] 6.813924 8.278065 9.781651 11.887838 12.043832
##
## $n
## [1] 7
##
## $conf
## [1] 7.625953 11.937349
##
## $out
## [1] 29.72412
# Removing duplicates from the next graph
#rownames(tga)
#loi <- subset(loi[1:47, ])
ggplot(tga,
aes(c550, c950, label = rownames(tga))) +
geom_point(size=2) +
geom_text_repel(size=3) + labs(title = "Organic vs. carbonate content") +
xlab('Organic LOI (%)') +
ylab('Carbonate LOI (%)') +
theme(axis.title = element_text())
barplot(tga$c550)
barplot(tga$c950)
# Stacked barplot (x = reorder(sample, LOI))
tga_long = gather(tga, key = var, value = value, c550, c950)
ggplot(tga_long, aes(x = reorder(Name, value), y = value, fill = var)) +
geom_bar(stat = 'identity', position = 'stack') +
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1))
# Libraries
library(dplyr);
library(ggplot2);
library(ggrepel);
library(rgl);
library(plot3D);
library(colordistance)
color1 <- read.table("data/color_AYB.txt", header = TRUE, sep = "\t", dec = ",")
rownames(color1) <- color1$Data.Name
color <- subset(color1[, 3:5])
summary(color)
## L a b
## Min. :34.56 Min. : 2.480 Min. : 6.98
## 1st Qu.:38.93 1st Qu.: 5.215 1st Qu.:12.73
## Median :45.30 Median : 5.540 Median :13.28
## Mean :46.69 Mean : 6.211 Mean :14.56
## 3rd Qu.:50.95 3rd Qu.: 6.675 3rd Qu.:16.21
## Max. :67.20 Max. :11.680 Max. :23.75
ggplot(color,
aes(a, b, label = rownames(color))) +
geom_point(size=2) +
geom_text_repel(size=3) +
labs(title = "Color (a*b)") +
xlab("a D65") +
ylab("b D65")
ggplot(color1,
aes(X, L, label = rownames(color))) +
geom_point(size=2) +
geom_text_repel(size=3) +
labs(title = "Luminance") +
ylab("L* (D65)") +
xlab("Sample number")
ggplot(color,
aes(a, L, label = rownames(color))) +
geom_point(size=2) +
geom_text_repel(size=3) +
labs(title = "A versus luminance") +
ylab("L* (D65)") +
xlab("a")
ggplot(color,
aes(b, L, label = rownames(color))) +
geom_point(size=2) +
geom_text_repel(size=3) +
labs(title = "B versus luminance") +
ylab("L* (D65)") +
xlab("b")
# Colorful 3D plot with drop lines
scatter3D(color$a, color$b, color$L,
xlab = "a* (D65)", ylab = "b* (D65)", zlab="L* (D65)",
phi = 0, bty = "g", type = "h",
ticktype = "detailed", pch = 19, cex = 1.2)
text3D(color$a, color$b, color$L, labels = rownames(color),
add = TRUE, colkey = FALSE, cex = 0.7)
scatter3D(color$L, color$a, color$b,
xlab = "L* (D65)", ylab = "a* (D65)", zlab="b* (D65)",
phi = 0, bty = "g", type = "h",
ticktype = "detailed", pch = 19, cex = 1.2)
text3D(color$L, color$a, color$b, labels = rownames(color),
add = TRUE, colkey = FALSE, cex = 0.7)
Intro about the site
From literature we can identify three rough classes of elements that could be used to analyze mudbricks from pXRF results:
Good: Rb, Sr, Y, Zr, Nb, Th, Pb, Cu, Zn, Fe
Neutral: V, Cr, Co, Ni, Ca, Ti
Bad: Na, P, Ba (+ Ca, Al)
The “bad” elements were excluded from the pXRF results from the get-go. After scaling and averaging the remaining values we ended up with two analysis datasets: “pxrf_all” that contains all the elements that have non-zero results, and “pxrf_no_na”, in which all the elements that have even one NA result are omitted.
pxrf_no_na:
summary(pxrf_no_na)
## Area Type Al2O3 SiO2
## Area D Acropolis:19 mud mortar : 1 Min. :-1.20092 Min. :-1.3142038
## Area D Wall :22 mud plaster: 5 1st Qu.:-0.49712 1st Qu.:-0.4978527
## Builder's floor : 5 mudbrick :35 Median :-0.23740 Median :-0.0003512
## Hellenistic : 1 PEM : 6 Mean : 0.04576 Mean : 0.0389010
## 3rd Qu.: 0.54615 3rd Qu.: 0.5568590
## Max. : 1.88920 Max. : 1.3770734
## K2O CaO Ti Mn
## Min. :-1.08731 Min. :-1.56460 Min. :-1.17385 Min. :-1.00032
## 1st Qu.:-0.54550 1st Qu.:-0.45677 1st Qu.:-0.47636 1st Qu.:-0.50265
## Median :-0.02978 Median : 0.07788 Median :-0.04881 Median :-0.13876
## Mean : 0.04621 Mean : 0.06819 Mean : 0.04264 Mean : 0.03671
## 3rd Qu.: 0.39368 3rd Qu.: 0.64823 3rd Qu.: 0.53925 3rd Qu.: 0.44988
## Max. : 2.35094 Max. : 1.91455 Max. : 2.05476 Max. : 1.93487
## Fe Cu Zn Rb
## Min. :-1.10643 Min. :-1.50957 Min. :-1.03131 Min. :-1.28223
## 1st Qu.:-0.49989 1st Qu.:-0.49095 1st Qu.:-0.61933 1st Qu.:-0.54198
## Median :-0.09144 Median : 0.18813 Median :-0.20735 Median :-0.13073
## Mean : 0.04543 Mean : 0.04846 Mean : 0.02864 Mean : 0.04121
## 3rd Qu.: 0.46449 3rd Qu.: 0.64086 3rd Qu.: 0.29970 3rd Qu.: 0.68149
## Max. : 2.31412 Max. : 1.88584 Max. : 2.45465 Max. : 1.92553
## Sr Zr
## Min. :-1.85766 Min. :-1.61460
## 1st Qu.:-0.41969 1st Qu.:-0.60738
## Median : 0.10393 Median : 0.05199
## Mean : 0.07807 Mean : 0.05398
## 3rd Qu.: 0.51031 3rd Qu.: 0.55041
## Max. : 2.08896 Max. : 2.11835
pxrf_all:
summary(pxrf_all)
## Area Type MgO Al2O3
## Area D Acropolis:19 mud mortar : 1 Min. :-1.238209 Min. :-1.20092
## Area D Wall :22 mud plaster: 5 1st Qu.:-0.422347 1st Qu.:-0.49712
## Builder's floor : 5 mudbrick :35 Median : 0.004755 Median :-0.23740
## Hellenistic : 1 PEM : 6 Mean : 0.038303 Mean : 0.04576
## 3rd Qu.: 0.429173 3rd Qu.: 0.54615
## Max. : 1.756566 Max. : 1.88920
## SiO2 S Cl K2O
## Min. :-1.3142038 Min. :-0.23396 Min. :-0.88004 Min. :-1.08731
## 1st Qu.:-0.4978527 1st Qu.: 0.00000 1st Qu.:-0.50859 1st Qu.:-0.54550
## Median :-0.0003512 Median : 0.00000 Median :-0.10495 Median :-0.02978
## Mean : 0.0389010 Mean :-0.01702 Mean : 0.01735 Mean : 0.04621
## 3rd Qu.: 0.5568590 3rd Qu.: 0.00000 3rd Qu.: 0.16497 3rd Qu.: 0.39368
## Max. : 1.3770734 Max. : 0.00000 Max. : 3.94139 Max. : 2.35094
## CaO Ti V Mn
## Min. :-1.56460 Min. :-1.17385 Min. :-0.98017 Min. :-1.00032
## 1st Qu.:-0.45677 1st Qu.:-0.47636 1st Qu.: 0.00000 1st Qu.:-0.50265
## Median : 0.07788 Median :-0.04881 Median : 0.00000 Median :-0.13876
## Mean : 0.06819 Mean : 0.04264 Mean : 0.03264 Mean : 0.03671
## 3rd Qu.: 0.64823 3rd Qu.: 0.53925 3rd Qu.: 0.00000 3rd Qu.: 0.44988
## Max. : 1.91455 Max. : 2.05476 Max. : 1.57588 Max. : 1.93487
## Fe Ni Cu Zn
## Min. :-1.10643 Min. :-1.10296 Min. :-1.50957 Min. :-1.03131
## 1st Qu.:-0.49989 1st Qu.:-0.42043 1st Qu.:-0.49095 1st Qu.:-0.61933
## Median :-0.09144 Median :-0.01092 Median : 0.18813 Median :-0.20735
## Mean : 0.04543 Mean : 0.01400 Mean : 0.04846 Mean : 0.02864
## 3rd Qu.: 0.46449 3rd Qu.: 0.53510 3rd Qu.: 0.64086 3rd Qu.: 0.29970
## Max. : 2.31412 Max. : 1.71814 Max. : 1.88584 Max. : 2.45465
## As Rb Sr Y
## Min. :-0.8953 Min. :-1.28223 Min. :-1.85766 Min. :-1.16797
## 1st Qu.: 0.0000 1st Qu.:-0.54198 1st Qu.:-0.41969 1st Qu.:-0.41036
## Median : 0.0000 Median :-0.13073 Median : 0.10393 Median : 0.00000
## Mean : 0.1205 Mean : 0.04121 Mean : 0.07807 Mean : 0.04659
## 3rd Qu.: 0.0000 3rd Qu.: 0.68149 3rd Qu.: 0.51031 3rd Qu.: 0.39182
## Max. : 2.3007 Max. : 1.92553 Max. : 2.08896 Max. : 2.26357
## Zr Nb
## Min. :-1.61460 Min. :-0.8931
## 1st Qu.:-0.60738 1st Qu.: 0.0000
## Median : 0.05199 Median : 0.0000
## Mean : 0.05398 Mean : 0.0232
## 3rd Qu.: 0.55041 3rd Qu.: 0.0000
## Max. : 2.11835 Max. : 2.1830
PCA with only the no-NA:s elements:
PCA with all the feasible elements:
# Libraries
library(dplyr);
library(openxlsx);
library(ggtern)
# Read and filter data
grain <- read.xlsx("data/grain_AY.xlsx", sep=";")
# Remove automatically created averages in order to include results from multiple runs for the same sample
grain <- grain %>%
filter(!grepl('Average', Sample)) %>%
filter(!grepl('test', Sample))
# Average data by "Sample", "Context" and "Type" columns from the original dataset
grain <- aggregate(grain, by=list(grain$Sample, grain$Context, grain$Type), FUN=mean)
# Remove the now empty columns for clarity
grain <- grain %>% select(-Sample)
grain <- grain %>% select(-Context)
grain <- grain %>% select(-Type)
# Assign sample names ("Group.1") as row names, and rename the other created columns back to "Type" and "Area" for clarity
rownames(grain) <- grain$Group.1
grain <- grain %>% select(-Group.1)
names(grain)[names(grain) == "Group.2"] <- "Context"
names(grain)[names(grain) == "Group.3"] <- "Type"
# Scaling clay-silt-sand portions to values between 0 and 1
# (Otherwise the differences in low clay percentages are too small to differentiate)
normalize <- function(x, na.rm=TRUE){(x-min(x, na.rm=TRUE))/(max(x, na.rm=TRUE)-min(x, na.rm=TRUE))}
# Apply function "normalize" to clay, silt and sand columns
grain_01 <- as.data.frame(apply(grain[4:6], 2, normalize))
# Ternary plots
ggtern(data=grain_01, aes(x=Clay, y=Sand, z=Silt, color=grain$Context)) +
labs(title="Clay silt sand") +
theme_rgbw() +
theme_nomask() +
geom_point(size=0) +
geom_text(aes(label=rownames(grain_01)), size=3)
ggtern(data=grain_01, aes(x=Clay, y=Sand, z=Silt, color=grain$Context)) +
labs(title="Clay silt sand") +
theme_nomask() +
geom_mask() +
geom_point(size=2)
ggtern(data=grain_01, aes(x=Clay, y=Sand, z=Silt, color=grain$Type)) +
labs(title="Clay silt sand") +
theme_nomask() +
geom_mask() +
geom_point(size=2)
We did loss on ignition analysis both in the traditional manual method and with a modern TGA for comparison.
# Libraries
library(dplyr);
library(GGally);
library(corrplot);
library(tidyr);
library(openxlsx);
library(cluster);
library(ggplot2);
library(ggrepel);
library(ggfortify);
library(pca3d);
library(ggdendro); # dendrograms together with ggplot
library(dendextend); # dendrograms - change parameters
library(ggtern) # ternary plots
Preparing data
loi <- read.xlsx("data/loi_israel.xlsx", sep=";")
tga <- read.xlsx("data/tga_israel.xlsx", sep=";")
summary(loi)
## sample crucible wet_weight sample_wet
## Length:78 Min. :7.921 Min. :10.13 Min. :1.538
## Class :character 1st Qu.:8.566 1st Qu.:11.18 1st Qu.:2.448
## Mode :character Median :9.006 Median :11.71 Median :2.635
## Mean :8.995 Mean :11.58 Mean :2.588
## 3rd Qu.:9.449 3rd Qu.:12.05 3rd Qu.:2.845
## Max. :9.925 Max. :12.68 Max. :3.231
## dry_weight after_550_C after_950_C
## Min. :10.11 Min. :10.08 Min. : 9.773
## 1st Qu.:11.17 1st Qu.:11.12 1st Qu.:10.820
## Median :11.67 Median :11.62 Median :11.302
## Mean :11.55 Mean :11.49 Mean :11.267
## 3rd Qu.:12.00 3rd Qu.:11.95 3rd Qu.:11.781
## Max. :12.65 Max. :12.60 Max. :12.524
summary(tga)
## Name Analysis;Date Batch Wet
## Length:72 Length:72 Length:72 Length:72
## Class :character Class :character Class :character Class :character
## Mode :character Mode :character Mode :character Mode :character
## Dry Mass_550 Mass_950
## Length:72 Length:72 Length:72
## Class :character Class :character Class :character
## Mode :character Mode :character Mode :character
# Remove rows containing words 'kekkila' (internal standard)
tga <- filter(tga, !grepl('kekkila', Name))
# TGA character columns to numeric
tga[, c(4:7)] <- sapply(tga[, c(4:7)], as.numeric)
# Sample names as rownames
rownames(loi) <- loi$sample
rownames(tga) <- tga$Name
# Creating new columns for analysis by subtracting crucible mass;
# "dry" (sample mass after drying)
# "c550" (sample mass after 550 C)
# "c950" (sample mass after 950 C)
loi$dry <- (loi$dry_weight - loi$crucible)
loi$c550mass <- (loi$after_550_C - loi$crucible)
loi$c950mass <- (loi$after_950_C - loi$crucible)
loi$c550 <- (loi$dry - loi$c550mass)/(loi$dry) * 100
loi$c950 <- (loi$c550mass - loi$c950mass)/(loi$c550mass) * 100
# Same with TGA
tga$c550 <- (tga$Dry - tga$Mass_550)/(tga$Dry) * 100
tga$c950 <- (tga$Mass_550 - tga$Mass_950)/(tga$Mass_550) * 100
Manual LOI results
# Total distribution of LOI in 550 C (1) and 950 C (2)
boxplot(loi$c550, loi$c950)
# boxplot.stats gives us the lower whisker or minimum, the interquartile range of the middle (the second and fourth values), the median (third value) and the upper whisker or maximum.
boxplot.stats(loi$c950)
## $stats
## [1] 0.398150 1.912342 3.049426 9.981413 19.321298
##
## $n
## [1] 78
##
## $conf
## [1] 1.605872 4.492981
##
## $out
## [1] 29.25001 40.50466 33.45907 42.03850 42.60863 29.14914 29.25505 27.79538
## [9] 33.34575 40.99217 26.13474 41.29933 32.40429
# The two high carbonate outliers are both AY-53 (two different measurements to make sure the first batch wasn't misleading)
# Removing duplicates from the next graph
rownames(loi)
## [1] "AH-1" "AH-2" "AH-3" "AH-4" "AH-5" "AH-6" "AH-7"
## [8] "AH-8" "AH-9" "TI-1" "TI-2" "TI-3" "TI-4" "TI-5"
## [15] "TI-6" "TI-7" "TI-8" "TI-10" "RLZ-1" "AY-43" "AY-44"
## [22] "AY-45" "AY-46" "AY-47" "AY-48" "AY-49" "AY-1" "AY-2"
## [29] "AY-3" "AY-4" "AY-5" "AY-6" "AY-7" "AY-8" "AY-9"
## [36] "AY-10" "AY-11" "AY-12" "AY-13" "AY-14" "AY-15" "AY-16"
## [43] "AY-17" "AY-18" "AY-19" "AY-20" "AY-21" "AY-22" "AY-23"
## [50] "AY-24" "AY-25" "AY-26" "AY-27" "AY-28" "AY-29" "AY-30"
## [57] "AY-31" "AY-32" "AY-33" "AY-34" "AY-35" "AY-36" "AY-37"
## [64] "AY-38" "AY-39" "AY-40" "AY-41" "AY-42" "AY-50" "AY-51"
## [71] "AY-52" "AY-53" "AY-54" "AY-50_2" "AY-51_2" "AY-52_2" "AY-53_2"
## [78] "AY-54_2"
loi <- subset(loi[1:72, ])
### add ggplots later with context and type
barplot(loi$c550)
barplot(loi$c950)
# Stacked barplot (x = reorder(sample, LOI))
loi_long = gather(loi, key = var, value = value, c550, c950)
ggplot(loi_long, aes(x = reorder(sample, value), y = value, fill = var)) +
geom_bar(stat = 'identity', position = 'stack') +
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1))
TGA LOI results
# Total distribution of LOI in 550 C (1) and 950 C (2)
boxplot(tga$c550, tga$c950)
# boxplot.stats gives us the lower whisker or minimum, the interquartile range of the middle (the second and fourth values), the median (third value) and the upper whisker or maximum.
boxplot.stats(tga$c950)
## $stats
## [1] 0.7233976 1.6976633 3.2631063 5.4590326 9.7816508
##
## $n
## [1] 69
##
## $conf
## [1] 2.547658 3.978555
##
## $out
## [1] 12.04383 11.73184 29.72412 42.03111 42.69172 30.94577 30.11137 28.19411
## [9] 34.00000 41.45557 29.66118 41.94993
# Removing duplicates from the next graph
#rownames(tga)
#loi <- subset(loi[1:47, ])
ggplot(tga,
aes(c550, c950, label = rownames(tga))) +
geom_point(size=2) +
geom_text_repel(size=3) + labs(title = "Organic vs. carbonate content") +
xlab('Organic LOI (%)') +
ylab('Carbonate LOI (%)') +
theme(axis.title = element_text())
barplot(tga$c550)
barplot(tga$c950)
# Stacked barplot (x = reorder(sample, LOI))
tga_long = gather(tga, key = var, value = value, c550, c950)
ggplot(tga_long, aes(x = reorder(Name, value), y = value, fill = var)) +
geom_bar(stat = 'identity', position = 'stack') +
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1))
# Libraries
library(dplyr);
library(ggplot2);
library(ggrepel);
library(rgl);
library(plot3D);
library(colordistance)
color1 <- read.table("data/color_israel.txt", header = TRUE, sep = "\t", dec = ",")
rownames(color1) <- color1$Data.Name
color <- subset(color1[, 3:5])
summary(color)
## L a b
## Min. :43.75 Min. : 3.160 Min. :13.85
## 1st Qu.:56.08 1st Qu.: 3.770 1st Qu.:14.71
## Median :64.30 Median : 4.350 Median :15.85
## Mean :63.19 Mean : 6.649 Mean :17.27
## 3rd Qu.:71.51 3rd Qu.: 6.280 3rd Qu.:17.70
## Max. :75.34 Max. :19.530 Max. :25.84
ggplot(color,
aes(a, b, label = rownames(color))) +
geom_point(size=2) +
geom_text_repel(size=3) +
labs(title = "Color (a*b)") +
xlab("a D65") +
ylab("b D65")
ggplot(color1,
aes(X, L, label = rownames(color))) +
geom_point(size=2) +
geom_text_repel(size=3) +
labs(title = "Luminance") +
ylab("L* (D65)") +
xlab("Sample number")
ggplot(color,
aes(a, L, label = rownames(color))) +
geom_point(size=2) +
geom_text_repel(size=3) +
labs(title = "A versus luminance") +
ylab("L* (D65)") +
xlab("a")
ggplot(color,
aes(b, L, label = rownames(color))) +
geom_point(size=2) +
geom_text_repel(size=3) +
labs(title = "B versus luminance") +
ylab("L* (D65)") +
xlab("b")
# Colorful 3D plot with drop lines
scatter3D(color$a, color$b, color$L,
xlab = "a* (D65)", ylab = "b* (D65)", zlab="L* (D65)",
phi = 0, bty = "g", type = "h",
ticktype = "detailed", pch = 19, cex = 1.2)
text3D(color$a, color$b, color$L, labels = rownames(color),
add = TRUE, colkey = FALSE, cex = 0.7)
scatter3D(color$L, color$a, color$b,
xlab = "L* (D65)", ylab = "a* (D65)", zlab="b* (D65)",
phi = 0, bty = "g", type = "h",
ticktype = "detailed", pch = 19, cex = 1.2)
text3D(color$L, color$a, color$b, labels = rownames(color),
add = TRUE, colkey = FALSE, cex = 0.7)
Intro about the site
From literature we can identify three rough classes of elements that could be used to analyze mudbricks from pXRF results:
Good: Rb, Sr, Y, Zr, Nb, Th, Pb, Cu, Zn, Fe
Neutral: V, Cr, Co, Ni, Ca, Ti
Bad: Na, P, Ba (+ Ca, Al)
The “bad” elements were excluded from the pXRF results from the get-go. After scaling and averaging the remaining values we ended up with two analysis datasets: “pxrf_all” that contains all the elements that have non-zero results, and “pxrf_no_na”, in which all the elements that have even one NA result are omitted.
# Libraries
library(dplyr);
library(GGally);
library(corrplot);
library(tidyr);
library(openxlsx);
library(cluster);
library(ggplot2);
library(ggfortify);
library(pca3d);
library(FactoMineR); # fast PCA graphs
library(ggdendro); # dendrograms together with ggplot
library(dendextend); # dendrograms - change parameters
library(tidyverse) # rectangles around HClust
# Read data
pxrf <- read.xlsx("data/pxrf_PP.xlsx", sep=";")
# Remove any rows containing words 'ERROR' or 'TEST'
pxrf <- filter(pxrf, !grepl('TEST', Sample))
pxrf <- filter(pxrf, !grepl('ERROR', Sample))
# Turn any "LOD":s and missing values to "NA":s to allow scaling
values <- c("MgO", "Al2O3", "SiO2",
"S", "Cl", "K2O", "CaO", "Ti", "V", "Cr",
"Mn", "Fe", "Co", "Ni", "Cu", "Zn", "As",
"Se", "Rb", "Sr", "Y", "Zr", "Nb", "Mo",
"Rh", "Pd", "Ag", "Cd", "Sn", "Sb",
"La", "Ce", "Hf", "Ta", "W", "Pt", "Au",
"Hg", "Tl", "Pb", "Bi", "Th", "U")
pxrf_NA <- select(pxrf, one_of(values))
pxrf_NA <- pxrf_NA %>% mutate_if(is.character,as.numeric)
# Average data by "Sample", "Area" and "Type" columns from the original dataset
average_pxrf <- aggregate(pxrf_NA, by = list(pxrf$Sample, pxrf$Area, pxrf$Type), FUN = mean)
# Assign sample names as row names, and rename the other newly created columns back to "Type" and "Area" for clarity
rownames(average_pxrf) <- average_pxrf$Group.1
average_pxrf <- average_pxrf %>% select(-Group.1)
names(average_pxrf)[names(average_pxrf) == "Group.2"] <- "Area"
names(average_pxrf)[names(average_pxrf) == "Group.3"] <- "Type"
# Change "Type" and "Area" to factors
average_pxrf$Type <- factor(average_pxrf$Type)
average_pxrf$Area <- factor(average_pxrf$Area)
# Removing columns that only have NA values
all_na <- function(x) any(!is.na(x))
pxrf_all <- average_pxrf %>% select_if(all_na)
# Removing columns if they contain any NA values at all
any_na <- function(x) all(!is.na(x))
pxrf_no_na <- average_pxrf %>% select_if(any_na)
# Datasets with only our new mudbrick samples (no soil or previously published ones)
MB_nona <- pxrf_no_na[c(1:11), ]
MB_all <- pxrf_all[c(1:11), ]
# Removing As, Nb, Rh, Ag and Pb as they have almost no viable measurement values
MB_all <- MB_all %>%
select(-As) %>%
select(-Nb) %>%
select(-Rh) %>%
select(-Ag) %>%
select(-Pb)
pxrf_no_na:
summary(pxrf_no_na)
## Area Type Ti Mn Fe
## new :11 mudbrick:45 Min. :0.0969 Min. :0.01190 Min. :0.9617
## old :34 soil : 8 1st Qu.:0.1814 1st Qu.:0.03893 1st Qu.:2.2241
## soil: 8 Median :0.2118 Median :0.04670 Median :2.7214
## Mean :0.2180 Mean :0.04549 Mean :2.8091
## 3rd Qu.:0.2594 3rd Qu.:0.05440 3rd Qu.:3.3530
## Max. :0.3571 Max. :0.06800 Max. :4.7754
## Ni Cu Zr
## Min. :0.000400 Min. :0.003600 Min. :0.004300
## 1st Qu.:0.002200 1st Qu.:0.005933 1st Qu.:0.007800
## Median :0.003000 Median :0.006500 Median :0.008800
## Mean :0.002856 Mean :0.006761 Mean :0.008810
## 3rd Qu.:0.003500 3rd Qu.:0.007400 3rd Qu.:0.009667
## Max. :0.005900 Max. :0.011333 Max. :0.012200
pxrf_all:
summary(pxrf_all)
## Area Type MgO Al2O3 SiO2
## new :11 mudbrick:45 Min. :2.919 Min. :2.182 Min. : 9.164
## old :34 soil : 8 1st Qu.:3.369 1st Qu.:3.403 1st Qu.:14.598
## soil: 8 Median :4.088 Median :4.317 Median :17.087
## Mean :4.011 Mean :4.168 Mean :16.502
## 3rd Qu.:4.499 3rd Qu.:4.702 3rd Qu.:18.560
## Max. :5.125 Max. :5.902 Max. :22.259
## NA's :34 NA's :34 NA's :34
## S Cl K2O CaO
## Min. :0.04373 Min. :0.01113 Min. :0.1980 Min. :11.28
## 1st Qu.:0.10686 1st Qu.:0.02597 1st Qu.:0.3599 1st Qu.:14.53
## Median :0.21532 Median :0.05117 Median :0.4238 Median :18.05
## Mean :0.25538 Mean :0.13135 Mean :0.4340 Mean :17.94
## 3rd Qu.:0.32239 3rd Qu.:0.10892 3rd Qu.:0.4795 3rd Qu.:20.82
## Max. :0.74597 Max. :0.93440 Max. :0.6584 Max. :25.59
## NA's :35 NA's :34 NA's :36 NA's :34
## Ti V Mn Fe
## Min. :0.0969 Min. :0.00267 Min. :0.01190 Min. :0.9617
## 1st Qu.:0.1814 1st Qu.:0.00334 1st Qu.:0.03893 1st Qu.:2.2241
## Median :0.2118 Median :0.00420 Median :0.04670 Median :2.7214
## Mean :0.2180 Mean :0.00425 Mean :0.04549 Mean :2.8091
## 3rd Qu.:0.2594 3rd Qu.:0.00512 3rd Qu.:0.05440 3rd Qu.:3.3530
## Max. :0.3571 Max. :0.00587 Max. :0.06800 Max. :4.7754
## NA's :41
## Ni Cu Zn As
## Min. :0.000400 Min. :0.003600 Min. :0.00167 Min. :0.00033
## 1st Qu.:0.002200 1st Qu.:0.005933 1st Qu.:0.00308 1st Qu.:0.00033
## Median :0.003000 Median :0.006500 Median :0.00337 Median :0.00040
## Mean :0.002856 Mean :0.006761 Mean :0.00335 Mean :0.00040
## 3rd Qu.:0.003500 3rd Qu.:0.007400 3rd Qu.:0.00382 3rd Qu.:0.00047
## Max. :0.005900 Max. :0.011333 Max. :0.00500 Max. :0.00047
## NA's :34 NA's :48
## Rb Sr Y Zr
## Min. :0.00120 Min. :0.01790 Min. :0.00087 Min. :0.004300
## 1st Qu.:0.00173 1st Qu.:0.02893 1st Qu.:0.00137 1st Qu.:0.007800
## Median :0.00203 Median :0.03403 Median :0.00160 Median :0.008800
## Mean :0.00204 Mean :0.03402 Mean :0.00161 Mean :0.008810
## 3rd Qu.:0.00213 3rd Qu.:0.03835 3rd Qu.:0.00173 3rd Qu.:0.009667
## Max. :0.00357 Max. :0.04840 Max. :0.00260 Max. :0.012200
## NA's :35 NA's :34 NA's :36
## Nb Rh Ag Pb
## Min. :0.00063 Min. :0.01570 Min. :0.00230 Min. :0.002
## 1st Qu.:0.00063 1st Qu.:0.01683 1st Qu.:0.00282 1st Qu.:0.002
## Median :0.00063 Median :0.01797 Median :0.00322 Median :0.002
## Mean :0.00063 Mean :0.01797 Mean :0.00330 Mean :0.002
## 3rd Qu.:0.00063 3rd Qu.:0.01910 3rd Qu.:0.00369 3rd Qu.:0.002
## Max. :0.00063 Max. :0.02023 Max. :0.00447 Max. :0.002
## NA's :52 NA's :51 NA's :49 NA's :52
MB_nona:
summary(MB_nona)
## Area Type Ti Mn Fe
## new :11 mudbrick:11 Min. :0.0969 Min. :0.01190 Min. :0.9617
## old : 0 soil : 0 1st Qu.:0.1663 1st Qu.:0.02397 1st Qu.:1.6270
## soil: 0 Median :0.1915 Median :0.03347 Median :2.1312
## Mean :0.1780 Mean :0.03070 Mean :1.8912
## 3rd Qu.:0.1941 3rd Qu.:0.03895 3rd Qu.:2.2622
## Max. :0.2295 Max. :0.04177 Max. :2.4481
## Ni Cu Zr
## Min. :0.001400 Min. :0.004500 Min. :0.005400
## 1st Qu.:0.002317 1st Qu.:0.005533 1st Qu.:0.007767
## Median :0.003000 Median :0.006700 Median :0.008733
## Mean :0.002812 Mean :0.007333 Mean :0.008494
## 3rd Qu.:0.003367 3rd Qu.:0.008717 3rd Qu.:0.009100
## Max. :0.003967 Max. :0.011333 Max. :0.011833
MB_all:
summary(MB_all)
## Area Type MgO Al2O3 SiO2
## new :11 mudbrick:11 Min. :2.919 Min. :2.182 Min. : 9.164
## old : 0 soil : 0 1st Qu.:3.531 1st Qu.:3.403 1st Qu.:13.366
## soil: 0 Median :4.088 Median :4.317 Median :17.705
## Mean :3.999 Mean :3.941 Mean :15.874
## 3rd Qu.:4.400 3rd Qu.:4.593 3rd Qu.:18.560
## Max. :5.125 Max. :5.270 Max. :20.334
##
## S Cl K2O CaO
## Min. :0.1622 Min. :0.02723 Min. :0.1980 Min. :11.28
## 1st Qu.:0.2737 1st Qu.:0.06783 1st Qu.:0.3920 1st Qu.:18.46
## Median :0.3221 Median :0.09397 Median :0.4376 Median :20.74
## Mean :0.3550 Mean :0.20759 Mean :0.4312 Mean :19.67
## 3rd Qu.:0.3809 3rd Qu.:0.23782 3rd Qu.:0.4757 3rd Qu.:22.12
## Max. :0.7460 Max. :0.93440 Max. :0.6052 Max. :25.59
## NA's :1
## Ti V Mn Fe
## Min. :0.0969 Min. :0.002667 Min. :0.01190 Min. :0.9617
## 1st Qu.:0.1663 1st Qu.:0.004317 1st Qu.:0.02397 1st Qu.:1.6270
## Median :0.1915 Median :0.004750 Median :0.03347 Median :2.1312
## Mean :0.1780 Mean :0.004721 Mean :0.03070 Mean :1.8912
## 3rd Qu.:0.1941 3rd Qu.:0.005633 3rd Qu.:0.03895 3rd Qu.:2.2622
## Max. :0.2295 Max. :0.005867 Max. :0.04177 Max. :2.4481
## NA's :3
## Ni Cu Zn Rb
## Min. :0.001400 Min. :0.004500 Min. :0.001667 Min. :0.001200
## 1st Qu.:0.002317 1st Qu.:0.005533 1st Qu.:0.002467 1st Qu.:0.001617
## Median :0.003000 Median :0.006700 Median :0.003233 Median :0.001767
## Mean :0.002812 Mean :0.007333 Mean :0.003170 Mean :0.001750
## 3rd Qu.:0.003367 3rd Qu.:0.008717 3rd Qu.:0.003800 3rd Qu.:0.001925
## Max. :0.003967 Max. :0.011333 Max. :0.005000 Max. :0.002133
## NA's :1
## Sr Y Zr
## Min. :0.01790 Min. :0.0008667 Min. :0.005400
## 1st Qu.:0.02893 1st Qu.:0.0013333 1st Qu.:0.007767
## Median :0.03507 Median :0.0014000 Median :0.008733
## Mean :0.03438 Mean :0.0013741 Mean :0.008494
## 3rd Qu.:0.03835 3rd Qu.:0.0014667 3rd Qu.:0.009100
## Max. :0.04840 Max. :0.0017333 Max. :0.011833
## NA's :2
Only the “new” mudbrick samples
# Visualize correlations between elements
cor(MB_all[3:20]) %>% corrplot (type = "lower", tl.cex = 1, tl.pos="d", cl.pos="r")
# Optimal number of clusters in K-means analysis: 2
k_max <- 8
twcss <- sapply(1:k_max, function(k){kmeans(MB_nona[3:8], k)$tot.withinss})
qplot(x = 1:k_max, y = twcss, geom = 'line')
# K-means clustering in ggpairs: 2 clusters
km <-kmeans(MB_nona[3:8], centers = 2)
cluster_x <- as.factor(km$cluster)
ggpairs(MB_nona[3:8], mapping = aes(col = cluster_x))
PCA with only the no-NA:s elements:
# PCA for only non-NA elements
colnames(MB_nona)
## [1] "Area" "Type" "Ti" "Mn" "Fe" "Ni" "Cu" "Zr"
pca_1 <- prcomp(MB_nona[3:8], scale = TRUE, center = TRUE)
summary(pca_1)
## Importance of components:
## PC1 PC2 PC3 PC4 PC5 PC6
## Standard deviation 2.0136 1.0789 0.70395 0.45806 0.25323 0.10779
## Proportion of Variance 0.6758 0.1940 0.08259 0.03497 0.01069 0.00194
## Cumulative Proportion 0.6758 0.8698 0.95241 0.98738 0.99806 1.00000
biplot(pca_1, choices = 1:2, cex = c(1, 1.2), col = c("grey80", "deeppink2"))
autoplot(pca_1, data=MB_nona, colour="red", shape = FALSE, label = TRUE, main = "PCA Palaepaphos 1: no NA:s at all")
PCA(MB_nona[3:8])
## **Results for the Principal Component Analysis (PCA)**
## The analysis was performed on 11 individuals, described by 6 variables
## *The results are available in the following objects:
##
## name description
## 1 "$eig" "eigenvalues"
## 2 "$var" "results for the variables"
## 3 "$var$coord" "coord. for the variables"
## 4 "$var$cor" "correlations variables - dimensions"
## 5 "$var$cos2" "cos2 for the variables"
## 6 "$var$contrib" "contributions of the variables"
## 7 "$ind" "results for the individuals"
## 8 "$ind$coord" "coord. for the individuals"
## 9 "$ind$cos2" "cos2 for the individuals"
## 10 "$ind$contrib" "contributions of the individuals"
## 11 "$call" "summary statistics"
## 12 "$call$centre" "mean of the variables"
## 13 "$call$ecart.type" "standard error of the variables"
## 14 "$call$row.w" "weights for the individuals"
## 15 "$call$col.w" "weights for the variables"
PCA with all the feasible elements:
# NA:s converted to zeros
MB_all[is.na(MB_all)] <- 0
colnames(MB_all)
## [1] "Area" "Type" "MgO" "Al2O3" "SiO2" "S" "Cl" "K2O" "CaO"
## [10] "Ti" "V" "Mn" "Fe" "Ni" "Cu" "Zn" "Rb" "Sr"
## [19] "Y" "Zr"
pca_2 <- prcomp((MB_all[3:20]), scale = TRUE, center = TRUE)
summary(pca_2)
## Importance of components:
## PC1 PC2 PC3 PC4 PC5 PC6 PC7
## Standard deviation 3.1620 1.8251 1.26077 1.10902 0.8950 0.63845 0.5612
## Proportion of Variance 0.5554 0.1851 0.08831 0.06833 0.0445 0.02265 0.0175
## Cumulative Proportion 0.5554 0.7405 0.82882 0.89715 0.9416 0.96429 0.9818
## PC8 PC9 PC10 PC11
## Standard deviation 0.4388 0.34174 0.13583 2.812e-16
## Proportion of Variance 0.0107 0.00649 0.00103 0.000e+00
## Cumulative Proportion 0.9925 0.99897 1.00000 1.000e+00
biplot(pca_2, choices = 1:2, cex = c(1, 1.2), col = c("grey80", "deeppink2"))
autoplot(pca_2, data=MB_all, colour="red", shape = FALSE, label = TRUE, main = "PCA Palaepaphos 2: NA:s permitted")
PCA(MB_all[3:20])
## **Results for the Principal Component Analysis (PCA)**
## The analysis was performed on 11 individuals, described by 18 variables
## *The results are available in the following objects:
##
## name description
## 1 "$eig" "eigenvalues"
## 2 "$var" "results for the variables"
## 3 "$var$coord" "coord. for the variables"
## 4 "$var$cor" "correlations variables - dimensions"
## 5 "$var$cos2" "cos2 for the variables"
## 6 "$var$contrib" "contributions of the variables"
## 7 "$ind" "results for the individuals"
## 8 "$ind$coord" "coord. for the individuals"
## 9 "$ind$cos2" "cos2 for the individuals"
## 10 "$ind$contrib" "contributions of the individuals"
## 11 "$call" "summary statistics"
## 12 "$call$centre" "mean of the variables"
## 13 "$call$ecart.type" "standard error of the variables"
## 14 "$call$row.w" "weights for the individuals"
## 15 "$call$col.w" "weights for the variables"
Including the previously published ones, No NA values permitted
# PCA for only non-NA elements
colnames(pxrf_no_na)
## [1] "Area" "Type" "Ti" "Mn" "Fe" "Ni" "Cu" "Zr"
pca_3 <- prcomp(na.omit(pxrf_no_na[3:6]), scale = TRUE, center = TRUE)
summary(pca_3)
## Importance of components:
## PC1 PC2 PC3 PC4
## Standard deviation 1.6514 0.9995 0.41614 0.31724
## Proportion of Variance 0.6818 0.2498 0.04329 0.02516
## Cumulative Proportion 0.6818 0.9315 0.97484 1.00000
biplot(pca_3, choices = 1:2, cex = c(1, 1.2), col = c("grey80", "deeppink2"))
autoplot(pca_3, data=pxrf_no_na, colour='Area', shape = FALSE, label = TRUE, main = "PCA Palaepaphos grouped by area")
autoplot(pca_3, data=pxrf_no_na, colour='Type', shape = FALSE, label = TRUE, main = "PCA Palaepaphos grouped by sample type")
Including the previously published ones, NA values are permitted This is not viable, as the difference in what values are available is too different between old and new samples.
# NA:s converted to zeros
pxrf_all[is.na(pxrf_all)] <- 0
colnames(pxrf_all)
## [1] "Area" "Type" "MgO" "Al2O3" "SiO2" "S" "Cl" "K2O" "CaO"
## [10] "Ti" "V" "Mn" "Fe" "Ni" "Cu" "Zn" "As" "Rb"
## [19] "Sr" "Y" "Zr" "Nb" "Rh" "Ag" "Pb"
pca_4 <- prcomp(pxrf_all[3:25], scale = TRUE, center = TRUE)
summary(pca_4)
## Importance of components:
## PC1 PC2 PC3 PC4 PC5 PC6 PC7
## Standard deviation 3.3748 1.8765 1.50629 1.13013 1.04455 0.89816 0.83735
## Proportion of Variance 0.4952 0.1531 0.09865 0.05553 0.04744 0.03507 0.03049
## Cumulative Proportion 0.4952 0.6483 0.74692 0.80245 0.84989 0.88497 0.91545
## PC8 PC9 PC10 PC11 PC12 PC13 PC14
## Standard deviation 0.77794 0.66209 0.53427 0.40495 0.35837 0.33354 0.27679
## Proportion of Variance 0.02631 0.01906 0.01241 0.00713 0.00558 0.00484 0.00333
## Cumulative Proportion 0.94176 0.96082 0.97323 0.98036 0.98595 0.99078 0.99412
## PC15 PC16 PC17 PC18 PC19 PC20 PC21
## Standard deviation 0.22788 0.17584 0.14605 0.12032 0.10296 0.06388 0.04325
## Proportion of Variance 0.00226 0.00134 0.00093 0.00063 0.00046 0.00018 0.00008
## Cumulative Proportion 0.99637 0.99772 0.99865 0.99927 0.99974 0.99991 0.99999
## PC22 PC23
## Standard deviation 0.01157 2.376e-16
## Proportion of Variance 0.00001 0.000e+00
## Cumulative Proportion 1.00000 1.000e+00
biplot(pca_4, choices = 1:2, cex = c(1, 1.2), col = c("grey80", "deeppink2"))
autoplot(pca_4, data=pxrf_all, colour='Area', shape = FALSE, label = TRUE, main = "PCA Palaepaphos grouped by area")
autoplot(pca_4, data=pxrf_all, colour='Type', shape = FALSE, label = TRUE, main = "PCA Palaepaphos grouped by sample type")
HCA including only new mudbrick samples:
# HCA dendrogam 1
dend <-
MB_all[3:20] %>% # data
dist %>% # calculate a distance matrix
hclust(method = "ward.D2") %>% # hierarchical clustering
as.dendrogram %>% # turn the object into a dendrogram
set("branches_k_color", k=5) %>%
set("branches_lwd", 1.2) %>%
set("labels_col", c("red", "black", "blue"), k=5) %>%
set("labels_cex", 1) %>%
set("leaves_pch", NA)
# plot the dend in usual "base" plotting engine:
plot(dend)
# HCA dendrogam 2
HCA.ward5 <- hclust(dist(MB_all[3:20], method="euclid"), method="ward.D2")
plot(HCA.ward5, main="Ward's method")
rect.hclust(HCA.ward5, k=5)
# Divisive hierarchical clustering (DIANA) just for comparison
hc4 <- diana(MB_all[3:20])
pltree(hc4, cex = 1, hang = -1, main = "Dendrogram of diana")
# Divise coefficient; amount of clustering structure found
hc4$dc
## [1] 0.7894269
HCA including all mudbrick samples, only no NA:s elements :
# HCA dendrogam 1
dend <-
pxrf_no_na[1:45, 3:8] %>% # data
dist %>% # calculate a distance matrix
hclust(method = "ward.D2") %>% # hierarchical clustering
as.dendrogram %>% # turn the object into a dendrogram
set("branches_k_color", k=5) %>%
set("branches_lwd", 1.2) %>%
set("labels_col", c("red", "black", "blue"), k=5) %>%
set("labels_cex", 1) %>%
set("leaves_pch", NA)
# plot the dend in usual "base" plotting engine:
plot(dend)
# HCA dendrogam 2
HCA.ward5 <- hclust(dist(pxrf_no_na[1:45, 3:8], method="euclid"), method="ward.D2")
plot(HCA.ward5, main="Ward's method")
rect.hclust(HCA.ward5, k=5)
# Divisive hierarchical clustering (DIANA) just for comparison
hc4 <- diana(pxrf_no_na[3:8])
pltree(hc4, cex = 1, hang = -1, main = "Dendrogram of diana")
# Divise coefficient; amount of clustering structure found
hc4$dc
## [1] 0.9804178
# Libraries
library(dplyr);
library(openxlsx);
library(ggtern)
# Read and filter data
grain <- read.xlsx("data/grain_PP.xlsx", sep=";")
# Remove automatically created averages in order to include results from multiple runs for the same sample
grain <- grain %>%
filter(!grepl('Average', Sample)) %>%
filter(!grepl('test', Sample))
# Average data by "Sample", "Context" and "Type" columns from the original dataset
grain <- aggregate(grain, by=list(grain$Sample, grain$Context, grain$Type), FUN=mean)
# Remove the now empty columns for clarity
grain <- grain %>% select(-Sample)
grain <- grain %>% select(-Context)
grain <- grain %>% select(-Type)
# Assign sample names ("Group.1") as row names, and rename the other created columns back to "Type" and "Area" for clarity
rownames(grain) <- grain$Group.1
grain <- grain %>% select(-Group.1)
names(grain)[names(grain) == "Group.2"] <- "Context"
names(grain)[names(grain) == "Group.3"] <- "Type"
# Scaling clay-silt-sand portions to values between 0 and 1
# (Otherwise the differences in low clay percentages are too small to differentiate)
normalize <- function(x, na.rm=TRUE){(x-min(x, na.rm=TRUE))/(max(x, na.rm=TRUE)-min(x, na.rm=TRUE))}
# Apply function "normalize" to clay, silt and sand columns
grain_01 <- as.data.frame(apply(grain[4:6], 2, normalize))
# Creating subsets for only mudbricks
MB_grain <- grain_01[c(1:11),]
MB_context <- grain[c(1:11),]
# Ternary plots
ggtern(data=MB_grain, aes(x=Clay, y=Sand, z=Silt, color=MB_context$Context)) +
labs(title="Clay silt sand") +
theme_rgbw() +
theme_nomask() +
geom_point(size=0) +
geom_text(aes(label=rownames(MB_grain)), size=3)
ggtern(data=grain_01, aes(x=Clay, y=Sand, z=Silt, color=grain$Context)) +
labs(title="Clay silt sand") +
theme_nomask() +
geom_mask() +
geom_point(size=2)
# Libraries
library(dplyr);
library(GGally);
library(corrplot);
library(tidyr);
library(openxlsx);
library(cluster);
library(ggplot2);
library(ggrepel);
library(ggfortify);
library(pca3d);
library(ggdendro); # dendrograms together with ggplot
library(dendextend); # dendrograms - change parameters
library(ggtern) # ternary plots
Preparing data
loi <- read.xlsx("data/loi_PP.xlsx", sep=";")
tga <- read.xlsx("data/tga_PP.xlsx", sep=";")
summary(loi)
## type sample crucible wet;weight
## Length:27 Length:27 Min. :8.100 Min. : 9.637
## Class :character Class :character 1st Qu.:8.639 1st Qu.:10.859
## Mode :character Mode :character Median :8.997 Median :11.184
## Mean :8.988 Mean :11.274
## 3rd Qu.:9.454 3rd Qu.:11.844
## Max. :9.560 Max. :12.301
## sample;wet dry_weight after_550_C after_950_C
## Min. :1.537 Min. : 9.595 Min. : 9.484 Min. : 9.23
## 1st Qu.:2.193 1st Qu.:10.813 1st Qu.:10.721 1st Qu.:10.27
## Median :2.371 Median :11.132 Median :11.019 Median :10.59
## Mean :2.286 Mean :11.216 Mean :11.113 Mean :10.67
## 3rd Qu.:2.453 3rd Qu.:11.777 3rd Qu.:11.681 3rd Qu.:11.18
## Max. :2.785 Max. :12.218 Max. :12.076 Max. :11.56
summary(tga)
## Name Analysis;Date Batch Wet
## Length:30 Length:30 Length:30 Length:30
## Class :character Class :character Class :character Class :character
## Mode :character Mode :character Mode :character Mode :character
## Dry Mass_550 Mass_950
## Length:30 Length:30 Length:30
## Class :character Class :character Class :character
## Mode :character Mode :character Mode :character
# Remove rows containing words 'kekkila' (internal standard)
tga <- filter(tga, !grepl('kekkila', Name))
# TGA character columns to numeric
tga[, c(4:7)] <- sapply(tga[, c(4:7)], as.numeric)
# Sample names as rownames
rownames(loi) <- loi$sample
rownames(tga) <- tga$Name
# Creating new columns for analysis by subtracting crucible mass;
# "dry" (sample mass after drying)
# "c550" (sample mass after 550 C)
# "c950" (sample mass after 950 C)
loi$dry <- (loi$dry_weight - loi$crucible)
loi$c550mass <- (loi$after_550_C - loi$crucible)
loi$c950mass <- (loi$after_950_C - loi$crucible)
loi$c550 <- (loi$dry - loi$c550mass)/(loi$dry) * 100
loi$c950 <- (loi$c550mass - loi$c950mass)/(loi$c550mass) * 100
# Same with TGA
tga$c550 <- (tga$Dry - tga$Mass_550)/(tga$Dry) * 100
tga$c950 <- (tga$Mass_550 - tga$Mass_950)/(tga$Mass_550) * 100
Manual LOI results
# Total distribution of LOI in 550 C (1) and 950 C (2)
boxplot(loi$c550, loi$c950)
# boxplot.stats gives us the lower whisker or minimum, the interquartile range of the middle (the second and fourth values), the median (third value) and the upper whisker or maximum.
boxplot.stats(loi$c950)
## $stats
## [1] 14.55842 18.28992 20.42575 21.68496 25.15246
##
## $n
## [1] 27
##
## $conf
## [1] 19.39341 21.45808
##
## $out
## [1] 33.21076 33.78112
# The two high carbonate outliers are both AY-53 (two different measurements to make sure the first batch wasn't misleading)
# Removing duplicates from the next graph
rownames(loi)
## [1] "PP-1" "PP-2" "PP-3" "PP-4" "PP-5" "PP-6" "PP-7" "PP-8"
## [9] "PP-9" "PP-10" "PP-11" "PP-12" "PP-13" "PP-14" "PP-15" "PP-16"
## [17] "PP-17" "PP-18" "PP-19" "PP-1_2" "PP-2_2" "PP-3_2" "PP-4_2" "PP-5_2"
## [25] "PP-6_2" "PP-7_2" "PP-8_2"
loi <- subset(loi[1:19, ])
ggplot(loi,
aes(c550, c950, label = rownames(loi), colour = factor(type))) +
geom_point(size=2, aes(shape = factor(type))) +
geom_text_repel(size=3) + labs(title = "Organic vs. carbonate content") +
xlab('Organic LOI (%)') +
ylab('Carbonate LOI (%)') +
theme(axis.title = element_text())
barplot(loi$c550)
barplot(loi$c950)
# Stacked barplot (x = reorder(sample, LOI))
loi_long = gather(loi, key = var, value = value, c550, c950)
ggplot(loi_long, aes(x = reorder(sample, value), y = value, fill = var)) +
geom_bar(stat = 'identity', position = 'stack') +
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1))
TGA LOI results
# Total distribution of LOI in 550 C (1) and 950 C (2)
boxplot(tga$c550, tga$c950)
# boxplot.stats gives us the lower whisker or minimum, the interquartile range of the middle (the second and fourth values), the median (third value) and the upper whisker or maximum.
boxplot.stats(tga$c950)
## $stats
## [1] 14.54839 18.93682 20.82648 22.49982 24.52314
##
## $n
## [1] 28
##
## $conf
## [1] 19.76259 21.89036
##
## $out
## [1] 31.05334 33.79251
# Removing duplicates from the next graph
#rownames(tga)
#loi <- subset(loi[1:47, ])
ggplot(tga,
aes(c550, c950, label = rownames(tga))) +
geom_point(size=2) +
geom_text_repel(size=3) + labs(title = "Organic vs. carbonate content") +
xlab('Organic LOI (%)') +
ylab('Carbonate LOI (%)') +
theme(axis.title = element_text())
barplot(tga$c550)
barplot(tga$c950)
# Stacked barplot (x = reorder(sample, LOI))
tga_long = gather(tga, key = var, value = value, c550, c950)
ggplot(tga_long, aes(x = reorder(Name, value), y = value, fill = var)) +
geom_bar(stat = 'identity', position = 'stack') +
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1))
# Libraries
library(dplyr);
library(ggplot2);
library(ggrepel);
library(rgl);
library(plot3D);
library(colordistance)
color1 <- read.table("data/color_PP.txt", header = TRUE, sep = "\t", dec = ",")
rownames(color1) <- color1$Data.Name
color <- subset(color1[, 3:5])
summary(color)
## L a b
## Min. :62.38 Min. : 7.27 Min. :19.90
## 1st Qu.:63.47 1st Qu.: 8.50 1st Qu.:21.36
## Median :64.19 Median : 9.34 Median :21.72
## Mean :65.50 Mean : 9.29 Mean :21.92
## 3rd Qu.:65.78 3rd Qu.:10.56 3rd Qu.:22.96
## Max. :72.00 Max. :10.90 Max. :23.90
ggplot(color,
aes(a, b, label = rownames(color))) +
geom_point(size=2) +
geom_text_repel(size=3) +
labs(title = "Color") +
xlab("a D65") +
ylab("b D65")
ggplot(color1,
aes(X, L, label = rownames(color))) +
geom_point(size=2) +
geom_text_repel(size=3) +
labs(title = "Luminance") +
ylab("L* (D65)") +
xlab("Sample number")
ggplot(color,
aes(a, L, label = rownames(color))) +
geom_point(size=2) +
geom_text_repel(size=3) +
labs(title = "Luminance") +
ylab("L* (D65)") +
xlab("a")
ggplot(color,
aes(b, L, label = rownames(color))) +
geom_point(size=2) +
geom_text_repel(size=3) +
labs(title = "Luminance") +
ylab("L* (D65)") +
xlab("b")
# Colorful 3D plot with drop lines
scatter3D(color$a, color$b, color$L,
xlab = "a* (D65)", ylab = "b* (D65)", zlab="L* (D65)",
phi = 0, bty = "g", type = "h",
ticktype = "detailed", pch = 19, cex = 1.2)
text3D(color$a, color$b, color$L, labels = rownames(color),
add = TRUE, colkey = FALSE, cex = 0.7)
scatter3D(color$L, color$a, color$b,
xlab = "L* (D65)", ylab = "a* (D65)", zlab="b* (D65)",
phi = 0, bty = "g", type = "h",
ticktype = "detailed", pch = 19, cex = 1.2)
text3D(color$L, color$a, color$b, labels = rownames(color),
add = TRUE, colkey = FALSE, cex = 0.7)
Intro about the site
From literature we can identify three rough classes of elements that could be used to analyze mudbricks from pXRF results:
Good: Rb, Sr, Y, Zr, Nb, Th, Pb, Cu, Zn, Fe
Neutral: V, Cr, Co, Ni, Ca, Ti
Bad: Na, P, Ba (+ Ca, Al)
The “bad” elements were excluded from the pXRF results from the get-go. After scaling and averaging the remaining values we ended up with two analysis datasets: “pxrf_all” that contains all the elements that have non-zero results, and “pxrf_no_na”, in which all the elements that have even one NA result are omitted.
# Libraries
library(openxlsx);
library(dplyr);
library(GGally);
library(corrplot);
library(tidyr);
library(cluster);
library(ggplot2);
library(ggrepel);
library(ggfortify);
library(pca3d);
library(ggdendro); # dendrograms together with ggplot
library(dendextend); # dendrograms - change parameters
# Read the raw data
pxrf <- read.xlsx("data/pxrf_AA.xlsx", sep=";")
# Remove any rows containing words 'ERROR' or 'TEST'
pxrf <- filter(pxrf, !grepl('TEST', Sample))
pxrf <- filter(pxrf, !grepl('ERROR', Sample))
# Turn any "LOD":s and missing values to "NA":s to allow scaling
# Removing "Na", S", "Cl", "P" and "Ba" already
values <- c("MgO", "Al2O3", "SiO2",
"K2O", "CaO", "Ti", "V", "Cr",
"Mn", "Fe", "Co", "Ni", "Cu", "Zn", "As",
"Se", "Rb", "Sr", "Y", "Zr", "Nb", "Mo",
"Rh", "Pd", "Ag", "Cd", "Sn", "Sb",
"La", "Ce", "Hf", "Ta", "W", "Pt", "Au",
"Hg", "Tl", "Pb", "Bi", "Th", "U")
pxrf_NA <- select(pxrf, one_of(values))
pxrf_NA <- pxrf_NA %>% mutate_if(is.character,as.numeric)
# Scaling the data with standard z-score method
scaled_pxrf <- as.data.frame(scale(pxrf_NA))
# Average data by "Sample", "Area" and "Type" columns from the original dataset
average_pxrf <- aggregate(scaled_pxrf, by = list(pxrf$Sample, pxrf$Area, pxrf$Type), FUN = mean)
# Assign sample names as row names, and rename the other newly created columns back to "Type" and "Area" for clarity
rownames(average_pxrf) <- average_pxrf$Group.1
average_pxrf <- average_pxrf %>% select(-Group.1)
names(average_pxrf)[names(average_pxrf) == "Group.2"] <- "Area"
names(average_pxrf)[names(average_pxrf) == "Group.3"] <- "Type"
# Change "Type" and "Area" to factors
average_pxrf$Type <- factor(average_pxrf$Type)
average_pxrf$Area <- factor(average_pxrf$Area)
# Removing columns that only have NA values
all_na <- function(x) any(!is.na(x))
pxrf_all <- average_pxrf %>% select_if(all_na)
# Removing columns if they contain any NA values at all
any_na <- function(x) all(!is.na(x))
pxrf_no_na <- average_pxrf %>% select_if(any_na)
pxrf_no_na:
summary(pxrf_no_na)
## Area Type MgO Al2O3
## Hill II :3 mudbrick:10 Min. :-0.7957 Min. :-1.47906
## Phase I :2 1st Qu.:-0.4773 1st Qu.:-0.41015
## Phase II :2 Median :-0.2741 Median : 0.05631
## Phase II or III:2 Mean : 0.0000 Mean : 0.00000
## Urartian silo :1 3rd Qu.: 0.3559 3rd Qu.: 0.45276
## Max. : 1.9131 Max. : 1.38944
## SiO2 K2O CaO Ti
## Min. :-1.6654 Min. :-1.55669 Min. :-1.4333259 Min. :-1.87044
## 1st Qu.:-0.3827 1st Qu.:-0.68787 1st Qu.:-0.6680205 1st Qu.:-0.31750
## Median : 0.3100 Median : 0.03848 Median :-0.0001026 Median : 0.02437
## Mean : 0.0000 Mean : 0.00000 Mean : 0.0000000 Mean : 0.00000
## 3rd Qu.: 0.6156 3rd Qu.: 0.75515 3rd Qu.: 0.7641718 3rd Qu.: 0.81009
## Max. : 0.8556 Max. : 1.11418 Max. : 1.2091486 Max. : 0.86883
## Mn Fe Ni Cu
## Min. :-1.03797 Min. :-0.73814 Min. :-0.94570 Min. :-1.2582
## 1st Qu.:-0.30897 1st Qu.:-0.28874 1st Qu.:-0.66424 1st Qu.:-0.5607
## Median :-0.09355 Median :-0.08696 Median :-0.07318 Median : 0.1094
## Mean : 0.00000 Mean : 0.00000 Mean : 0.00000 Mean : 0.0000
## 3rd Qu.: 0.21238 3rd Qu.: 0.08823 3rd Qu.: 0.45456 3rd Qu.: 0.3966
## Max. : 1.57654 Max. : 1.60860 Max. : 1.50299 Max. : 1.5865
## Zn Rb Sr Y
## Min. :-0.8847 Min. :-1.2036 Min. :-1.4059 Min. :-1.0149
## 1st Qu.:-0.3508 1st Qu.:-0.4637 1st Qu.:-0.3803 1st Qu.:-0.3542
## Median :-0.0839 Median :-0.2006 Median :-0.1555 Median :-0.1163
## Mean : 0.0000 Mean : 0.0000 Mean : 0.0000 Mean : 0.0000
## 3rd Qu.: 0.2720 3rd Qu.: 0.3256 3rd Qu.: 0.4915 3rd Qu.: 0.1216
## Max. : 1.5813 Max. : 1.4601 Max. : 1.4314 Max. : 1.7338
## Zr
## Min. :-1.1158
## 1st Qu.:-0.5166
## Median :-0.3001
## Mean : 0.0000
## 3rd Qu.: 0.1279
## Max. : 1.9054
pxrf_all:
summary(pxrf_all)
## Area Type MgO Al2O3
## Hill II :3 mudbrick:10 Min. :-0.7957 Min. :-1.47906
## Phase I :2 1st Qu.:-0.4773 1st Qu.:-0.41015
## Phase II :2 Median :-0.2741 Median : 0.05631
## Phase II or III:2 Mean : 0.0000 Mean : 0.00000
## Urartian silo :1 3rd Qu.: 0.3559 3rd Qu.: 0.45276
## Max. : 1.9131 Max. : 1.38944
##
## SiO2 K2O CaO Ti
## Min. :-1.6654 Min. :-1.55669 Min. :-1.4333259 Min. :-1.87044
## 1st Qu.:-0.3827 1st Qu.:-0.68787 1st Qu.:-0.6680205 1st Qu.:-0.31750
## Median : 0.3100 Median : 0.03848 Median :-0.0001026 Median : 0.02437
## Mean : 0.0000 Mean : 0.00000 Mean : 0.0000000 Mean : 0.00000
## 3rd Qu.: 0.6156 3rd Qu.: 0.75515 3rd Qu.: 0.7641718 3rd Qu.: 0.81009
## Max. : 0.8556 Max. : 1.11418 Max. : 1.2091486 Max. : 0.86883
##
## V Cr Mn Fe
## Min. :-0.46003 Min. :-0.686179 Min. :-1.03797 Min. :-0.73814
## 1st Qu.:-0.43566 1st Qu.:-0.459188 1st Qu.:-0.30897 1st Qu.:-0.28874
## Median :-0.19193 Median :-0.232198 Median :-0.09355 Median :-0.08696
## Mean :-0.09932 Mean :-0.232198 Mean : 0.00000 Mean : 0.00000
## 3rd Qu.: 0.02742 3rd Qu.:-0.005208 3rd Qu.: 0.21238 3rd Qu.: 0.08823
## Max. : 0.56361 Max. : 0.221783 Max. : 1.57654 Max. : 1.60860
## NA's :5 NA's :8
## Ni Cu Zn As
## Min. :-0.94570 Min. :-1.2582 Min. :-0.8847 Min. :-1.03932
## 1st Qu.:-0.66424 1st Qu.:-0.5607 1st Qu.:-0.3508 1st Qu.:-0.21581
## Median :-0.07318 Median : 0.1094 Median :-0.0839 Median :-0.05111
## Mean : 0.00000 Mean : 0.0000 Mean : 0.0000 Mean : 0.05869
## 3rd Qu.: 0.45456 3rd Qu.: 0.3966 3rd Qu.: 0.2720 3rd Qu.: 0.44299
## Max. : 1.50299 Max. : 1.5865 Max. : 1.5813 Max. : 1.10179
## NA's :1
## Rb Sr Y Zr
## Min. :-1.2036 Min. :-1.4059 Min. :-1.0149 Min. :-1.1158
## 1st Qu.:-0.4637 1st Qu.:-0.3803 1st Qu.:-0.3542 1st Qu.:-0.5166
## Median :-0.2006 Median :-0.1555 Median :-0.1163 Median :-0.3001
## Mean : 0.0000 Mean : 0.0000 Mean : 0.0000 Mean : 0.0000
## 3rd Qu.: 0.3256 3rd Qu.: 0.4915 3rd Qu.: 0.1216 3rd Qu.: 0.1279
## Max. : 1.4601 Max. : 1.4314 Max. : 1.7338 Max. : 1.9054
##
## Nb
## Min. :-0.65796
## 1st Qu.:-0.50862
## Median :-0.06060
## Mean :-0.09379
## 3rd Qu.:-0.06060
## Max. : 0.93500
## NA's :4
# Drop the more dubious elements
pxrf_1 <- pxrf_no_na %>% select(-Al2O3)
pxrf_1 <- pxrf_1 %>% select(-CaO)
# Visualize correlations between elements
cor(pxrf_1[3:15]) %>% corrplot (type = "lower", tl.cex = 1, tl.pos="d", cl.pos="r")
# Optimal number of clusters in K-means analysis: 2
k_max <- 8
twcss <- sapply(1:k_max, function(k){kmeans(pxrf_1[3:12], k)$tot.withinss})
qplot(x = 1:k_max, y = twcss, geom = 'line')
# K-means clustering in ggpairs: 2 clusters
km <-kmeans(pxrf_1[3:12], centers = 2)
cluster_x <- as.factor(km$cluster)
ggpairs(pxrf_1, mapping = aes(col = cluster_x))
PCA with only the no-NA:s elements:
# PCA
pca_1 <- prcomp(pxrf_1[3:15])
summary(pca_1)
## Importance of components:
## PC1 PC2 PC3 PC4 PC5 PC6 PC7
## Standard deviation 2.4147 1.0889 0.9306 0.48309 0.37987 0.31662 0.24466
## Proportion of Variance 0.6901 0.1403 0.1025 0.02762 0.01708 0.01186 0.00708
## Cumulative Proportion 0.6901 0.8305 0.9330 0.96059 0.97767 0.98953 0.99662
## PC8 PC9 PC10
## Standard deviation 0.15630 0.06455 7.266e-17
## Proportion of Variance 0.00289 0.00049 0.000e+00
## Cumulative Proportion 0.99951 1.00000 1.000e+00
# PCA1 plots
biplot(pca_1, choices = 1:2, cex = c(1, 1.2), col = c("grey80", "deeppink2"), main = "PCA limited elements")
autoplot(pca_1, data=pxrf_1, colour='Area', shape = FALSE, label = TRUE, main = "PCA grouped by area")
PCA with all the feasible elements:
# PCA with all elements, NA:s converted to zeros
pxrf_all[is.na(pxrf_all)] <- 0
pca_2 <- prcomp(pxrf_all[3:21])
summary(pca_2)
## Importance of components:
## PC1 PC2 PC3 PC4 PC5 PC6 PC7
## Standard deviation 2.6421 1.2026 0.98838 0.84185 0.45012 0.39002 0.30457
## Proportion of Variance 0.6529 0.1353 0.09136 0.06628 0.01895 0.01423 0.00868
## Cumulative Proportion 0.6529 0.7881 0.87946 0.94574 0.96469 0.97891 0.98759
## PC8 PC9 PC10
## Standard deviation 0.28180 0.23084 2.238e-16
## Proportion of Variance 0.00743 0.00498 0.000e+00
## Cumulative Proportion 0.99502 1.00000 1.000e+00
# PCA2 plots
biplot(pca_2, choices = 1:2, cex = c(1, 1.2), col = c("grey80", "deeppink2"), main = "PCA more elements")
autoplot(pca_2, data=pxrf_all, colour='Area', shape = FALSE, label = TRUE, main = "PCA more elements grouped by area")
# HCA
# HCA dendrogam 1
dend <-
pxrf_1[3:12] %>% # data
dist %>% # calculate a distance matrix
hclust(method = "ward.D2") %>% # hierarchical clustering
as.dendrogram %>% # turn the object into a dendrogram
set("branches_k_color", k=4) %>%
set("branches_lwd", 1.2) %>%
set("labels_col", c("red", "black", "blue"), k=4) %>%
set("labels_cex", 1) %>%
set("leaves_pch", NA)
# plot the dend in usual "base" plotting engine:
plot(dend)
# HCA dendrogam 2:
HCA.ward4 <- hclust(dist(pxrf_1[3:12], method="euclid"), method="ward.D2")
plot(HCA.ward4, main="HCA with clusters")
rect.hclust(HCA.ward4, k=4)
# Libraries
library(dplyr);
library(openxlsx);
library(ggtern)
# Read and filter data
grain <- read.xlsx("data/grain_AA.xlsx", sep=";")
# Remove automatically created averages in order to include results from multiple runs for the same sample
grain <- grain %>%
filter(!grepl('Average', Sample)) %>%
filter(!grepl('test', Sample))
# Average data by "Sample", "Context" and "Type" columns from the original dataset
grain <- aggregate(grain, by=list(grain$Sample, grain$Context, grain$Type), FUN=mean)
# Remove the now empty columns for clarity
grain <- grain %>% select(-Sample)
grain <- grain %>% select(-Context)
grain <- grain %>% select(-Type)
# Assign sample names ("Group.1") as row names, and rename the other created columns back to "Type" and "Area" for clarity
rownames(grain) <- grain$Group.1
grain <- grain %>% select(-Group.1)
names(grain)[names(grain) == "Group.2"] <- "Context"
names(grain)[names(grain) == "Group.3"] <- "Type"
# Scaling clay-silt-sand portions to values between 0 and 1
# (Otherwise the differences in low clay percentages are too small to differentiate)
normalize <- function(x, na.rm=TRUE){(x-min(x, na.rm=TRUE))/(max(x, na.rm=TRUE)-min(x, na.rm=TRUE))}
# Apply function "normalize" to clay, silt and sand columns
grain_01 <- as.data.frame(apply(grain[4:6], 2, normalize))
# Ternary plots
ggtern(data=grain_01, aes(x=Clay, y=Sand, z=Silt, color=grain$Context)) +
labs(title="Clay silt sand") +
theme_rgbw() +
theme_nomask() +
geom_point(size=0) +
geom_text(aes(label=rownames(grain_01)), size=3)
We did loss on ignition analysis both in the traditional manual method and with a modern TGA for comparison.
# Libraries
library(dplyr);
library(GGally);
library(corrplot);
library(tidyr);
library(openxlsx);
library(cluster);
library(ggplot2);
library(ggrepel);
library(ggfortify);
library(pca3d);
library(ggdendro); # dendrograms together with ggplot
library(dendextend); # dendrograms - change parameters
library(ggtern) # ternary plots
Preparing data
loi <- read.xlsx("data/loi_AA.xlsx", sep=";")
tga <- read.xlsx("data/tga_AA.xlsx", sep=";")
summary(loi)
## sample crucible wet_weight sample_wet
## Length:10 Min. :8.066 Min. :10.70 Min. :1.923
## Class :character 1st Qu.:8.461 1st Qu.:10.93 1st Qu.:2.326
## Mode :character Median :8.995 Median :11.48 Median :2.558
## Mean :8.892 Mean :11.39 Mean :2.496
## 3rd Qu.:9.174 3rd Qu.:11.75 3rd Qu.:2.701
## Max. :9.890 Max. :12.18 Max. :2.842
## dry_weight after_550_C after_950_C
## Min. :10.58 Min. :10.47 Min. :10.31
## 1st Qu.:10.85 1st Qu.:10.78 1st Qu.:10.60
## Median :11.43 Median :11.33 Median :11.16
## Mean :11.32 Mean :11.22 Mean :11.07
## 3rd Qu.:11.69 3rd Qu.:11.58 3rd Qu.:11.42
## Max. :12.13 Max. :12.06 Max. :11.94
summary(tga)
## Name Analysis;Date Batch Wet
## Length:12 Length:12 Length:12 Length:12
## Class :character Class :character Class :character Class :character
## Mode :character Mode :character Mode :character Mode :character
## Dry Mass_550 Mass_950
## Length:12 Length:12 Length:12
## Class :character Class :character Class :character
## Mode :character Mode :character Mode :character
# Remove rows containing words 'kekkila' (internal standard)
tga <- filter(tga, !grepl('kekkila', Name))
# TGA character columns to numeric
tga[, c(4:7)] <- sapply(tga[, c(4:7)], as.numeric)
# Sample names as rownames
rownames(loi) <- loi$sample
rownames(tga) <- tga$Name
# Creating new columns for analysis by subtracting crucible mass;
# "dry" (sample mass after drying)
# "c550" (sample mass after 550 C)
# "c950" (sample mass after 950 C)
loi$dry <- (loi$dry_weight - loi$crucible)
loi$c550mass <- (loi$after_550_C - loi$crucible)
loi$c950mass <- (loi$after_950_C - loi$crucible)
loi$c550 <- (loi$dry - loi$c550mass)/(loi$dry) * 100
loi$c950 <- (loi$c550mass - loi$c950mass)/(loi$c550mass) * 100
# Same with TGA
tga$c550 <- (tga$Dry - tga$Mass_550)/(tga$Dry) * 100
tga$c950 <- (tga$Mass_550 - tga$Mass_950)/(tga$Mass_550) * 100
Manual LOI results
# Total distribution of LOI in 550 C (1) and 950 C (2)
boxplot(loi$c550, loi$c950)
# boxplot.stats gives us the lower whisker or minimum, the interquartile range of the middle (the second and fourth values), the median (third value) and the upper whisker or maximum.
boxplot.stats(loi$c950)
## $stats
## [1] 5.507166 5.756990 6.617026 7.738517 7.965070
##
## $n
## [1] 10
##
## $conf
## [1] 5.626976 7.607076
##
## $out
## numeric(0)
# The two high carbonate outliers are both AY-53 (two different measurements to make sure the first batch wasn't misleading)
# Removing duplicates from the next graph
rownames(loi)
## [1] "AA-1" "AA-2" "AA-3" "AA-4" "AA-5" "AA-6" "AA-7" "AA-8" "AA-9"
## [10] "AA-10"
loi <- subset(loi[1:47, ])
### ggplots later with context and type
barplot(loi$c550)
barplot(loi$c950)
# Stacked barplot (x = reorder(sample, LOI))
loi_long = gather(loi, key = var, value = value, c550, c950)
ggplot(loi_long, aes(x = reorder(sample, value), y = value, fill = var)) +
geom_bar(stat = 'identity', position = 'stack') +
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1))
TGA LOI results
# Total distribution of LOI in 550 C (1) and 950 C (2)
boxplot(tga$c550, tga$c950)
# boxplot.stats gives us the lower whisker or minimum, the interquartile range of the middle (the second and fourth values), the median (third value) and the upper whisker or maximum.
boxplot.stats(tga$c950)
## $stats
## [1] 5.611934 6.118481 6.532557 7.126867 8.599873
##
## $n
## [1] 11
##
## $conf
## [1] 6.052174 7.012940
##
## $out
## [1] 4.097341
# Removing duplicates from the next graph
#rownames(tga)
#loi <- subset(loi[1:47, ])
ggplot(tga,
aes(c550, c950, label = rownames(tga))) +
geom_point(size=2) +
geom_text_repel(size=3) + labs(title = "Organic vs. carbonate content") +
xlab('Organic LOI (%)') +
ylab('Carbonate LOI (%)') +
theme(axis.title = element_text())
barplot(tga$c550)
barplot(tga$c950)
# Stacked barplot (x = reorder(sample, LOI))
tga_long = gather(tga, key = var, value = value, c550, c950)
ggplot(tga_long, aes(x = reorder(Name, value), y = value, fill = var)) +
geom_bar(stat = 'identity', position = 'stack') +
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1))
# Libraries
library(dplyr);
library(ggplot2);
library(ggrepel);
library(rgl);
library(plot3D);
library(colordistance)
color1 <- read.table("data/color_AA.txt", header = TRUE, sep = "\t", dec = ",")
rownames(color1) <- color1$Data.Name
color <- subset(color1[, 3:5])
summary(color)
## L a b
## Min. :51.37 Min. :3.880 Min. :15.92
## 1st Qu.:57.73 1st Qu.:4.402 1st Qu.:16.49
## Median :58.79 Median :4.555 Median :16.93
## Mean :58.18 Mean :5.067 Mean :17.32
## 3rd Qu.:59.58 3rd Qu.:4.695 3rd Qu.:17.32
## Max. :61.57 Max. :8.670 Max. :21.47
ggplot(color,
aes(a, b, label = rownames(color))) +
geom_point(size=2) +
geom_text_repel(size=3) +
labs(title = "Color (a*b)") +
xlab("a D65") +
ylab("b D65")
ggplot(color1,
aes(X, L, label = rownames(color))) +
geom_point(size=2) +
geom_text_repel(size=3) +
labs(title = "Luminance") +
ylab("L* (D65)") +
xlab("Sample number")
ggplot(color,
aes(a, L, label = rownames(color))) +
geom_point(size=2) +
geom_text_repel(size=3) +
labs(title = "A versus luminance") +
ylab("L* (D65)") +
xlab("a")
ggplot(color,
aes(b, L, label = rownames(color))) +
geom_point(size=2) +
geom_text_repel(size=3) +
labs(title = "B versus luminance") +
ylab("L* (D65)") +
xlab("b")
# Colorful 3D plot with drop lines
scatter3D(color$a, color$b, color$L,
xlab = "a* (D65)", ylab = "b* (D65)", zlab="L* (D65)",
phi = 0, bty = "g", type = "h",
ticktype = "detailed", pch = 19, cex = 1.2)
text3D(color$a, color$b, color$L, labels = rownames(color),
add = TRUE, colkey = FALSE, cex = 0.7)
scatter3D(color$L, color$a, color$b,
xlab = "L* (D65)", ylab = "a* (D65)", zlab="b* (D65)",
phi = 0, bty = "g", type = "h",
ticktype = "detailed", pch = 19, cex = 1.2)
text3D(color$L, color$a, color$b, labels = rownames(color),
add = TRUE, colkey = FALSE, cex = 0.7)